@nxtedition/rocksdb 11.0.3 → 11.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/binding.cc +147 -125
  2. package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
  3. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  4. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
  5. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
  6. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
  8. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
  9. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
  10. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
  11. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
  12. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
  13. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
  14. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  15. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
  16. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
  17. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
  18. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
  19. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
  20. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
  21. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
  22. package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
  23. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  24. package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
  25. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
  26. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
  27. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
  29. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  31. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
  32. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
  33. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
  34. package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
  35. package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
  36. package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
  37. package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
  38. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
  39. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
  40. package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
  41. package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
  42. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
  43. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
  44. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
  45. package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
  46. package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
  56. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
  60. package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
  61. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
  62. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
  63. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
  64. package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
  65. package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
  66. package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
  67. package/deps/rocksdb/rocksdb/file/filename.h +2 -1
  68. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
  69. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
  70. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
  71. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
  73. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
  75. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
  76. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
  77. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
  80. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
  81. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  84. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
  85. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
  86. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
  87. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
  88. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
  89. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  90. package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
  91. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  92. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
  93. package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
  94. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
  99. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
  100. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
  101. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
  102. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
  103. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
  104. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
  105. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
  106. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
  107. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
  108. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
  110. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
  111. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
  112. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  113. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
  114. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
  115. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
  116. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
  117. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
  118. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
  119. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
  120. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
  121. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  122. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
  123. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
  124. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  125. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
  126. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
  127. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
  128. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  129. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
  130. package/index.js +5 -17
  131. package/iterator.js +1 -1
  132. package/package.json +1 -1
  133. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  134. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
package/binding.cc CHANGED
@@ -37,7 +37,7 @@ class NullLogger : public rocksdb::Logger {
37
37
  };
38
38
 
39
39
  struct Database;
40
- struct Iterator;
40
+ class Iterator;
41
41
 
42
42
  struct ColumnFamily {
43
43
  rocksdb::ColumnFamilyHandle* handle;
@@ -285,10 +285,7 @@ struct BaseIterator : public Closable {
285
285
  const int limit,
286
286
  const bool fillCache,
287
287
  bool tailing = false)
288
- : database_(database),
289
- column_(column),
290
- reverse_(reverse),
291
- limit_(limit) {
288
+ : database_(database), column_(column), reverse_(reverse), limit_(limit) {
292
289
  if (lte) {
293
290
  upper_bound_ = rocksdb::PinnableSlice();
294
291
  *upper_bound_->GetSelf() = std::move(*lte) + '\0';
@@ -310,12 +307,15 @@ struct BaseIterator : public Closable {
310
307
  }
311
308
 
312
309
  rocksdb::ReadOptions readOptions;
310
+
313
311
  if (upper_bound_) {
314
312
  readOptions.iterate_upper_bound = &*upper_bound_;
315
313
  }
314
+
316
315
  if (lower_bound_) {
317
316
  readOptions.iterate_lower_bound = &*lower_bound_;
318
317
  }
318
+
319
319
  readOptions.fill_cache = fillCache;
320
320
  readOptions.async_io = true;
321
321
  readOptions.adaptive_readahead = true;
@@ -332,9 +332,15 @@ struct BaseIterator : public Closable {
332
332
  database_->Attach(this);
333
333
  }
334
334
 
335
- virtual ~BaseIterator() { assert(!iterator_); }
335
+ virtual ~BaseIterator() {
336
+ if (iterator_) {
337
+ database_->Detach(this);
338
+ }
339
+ }
336
340
 
337
341
  virtual void Seek(const rocksdb::Slice& target) {
342
+ assert(iterator_);
343
+
338
344
  if ((upper_bound_ && target.compare(*upper_bound_) >= 0) || (lower_bound_ && target.compare(*lower_bound_) < 0)) {
339
345
  // TODO (fix): Why is this required? Seek should handle it?
340
346
  // https://github.com/facebook/rocksdb/issues/9904
@@ -350,8 +356,10 @@ struct BaseIterator : public Closable {
350
356
  }
351
357
 
352
358
  virtual rocksdb::Status Close() override {
353
- iterator_.reset();
354
- database_->Detach(this);
359
+ if (iterator_) {
360
+ iterator_.reset();
361
+ database_->Detach(this);
362
+ }
355
363
  return rocksdb::Status::OK();
356
364
  }
357
365
 
@@ -393,7 +401,6 @@ struct BaseIterator : public Closable {
393
401
  rocksdb::ColumnFamilyHandle* column_;
394
402
 
395
403
  private:
396
-
397
404
  int count_ = 0;
398
405
  std::optional<rocksdb::PinnableSlice> lower_bound_;
399
406
  std::optional<rocksdb::PinnableSlice> upper_bound_;
@@ -402,7 +409,15 @@ struct BaseIterator : public Closable {
402
409
  const int limit_;
403
410
  };
404
411
 
405
- struct Iterator final : public BaseIterator {
412
+ class Iterator final : public BaseIterator {
413
+ const bool keys_;
414
+ const bool values_;
415
+ const size_t highWaterMarkBytes_;
416
+ bool first_ = true;
417
+ const Encoding keyEncoding_;
418
+ const Encoding valueEncoding_;
419
+
420
+ public:
406
421
  Iterator(Database* database,
407
422
  rocksdb::ColumnFamilyHandle* column,
408
423
  const bool reverse,
@@ -430,12 +445,115 @@ struct Iterator final : public BaseIterator {
430
445
  return BaseIterator::Seek(target);
431
446
  }
432
447
 
433
- const bool keys_;
434
- const bool values_;
435
- const size_t highWaterMarkBytes_;
436
- bool first_ = true;
437
- const Encoding keyEncoding_;
438
- const Encoding valueEncoding_;
448
+ static std::unique_ptr<Iterator> create(napi_env env, napi_value db, napi_value options) {
449
+ Database* database;
450
+ NAPI_STATUS_THROWS(napi_get_value_external(env, db, reinterpret_cast<void**>(&database)));
451
+
452
+ bool reverse = false;
453
+ NAPI_STATUS_THROWS(GetProperty(env, options, "reverse", reverse));
454
+
455
+ bool keys = true;
456
+ NAPI_STATUS_THROWS(GetProperty(env, options, "keys", keys));
457
+
458
+ bool values = true;
459
+ NAPI_STATUS_THROWS(GetProperty(env, options, "values", values));
460
+
461
+ bool tailing = false;
462
+ NAPI_STATUS_THROWS(GetProperty(env, options, "tailing", tailing));
463
+
464
+ bool fillCache = false;
465
+ NAPI_STATUS_THROWS(GetProperty(env, options, "fillCache", fillCache));
466
+
467
+ int32_t limit = -1;
468
+ NAPI_STATUS_THROWS(GetProperty(env, options, "limit", limit));
469
+
470
+ int32_t highWaterMarkBytes = 64 * 1024;
471
+ NAPI_STATUS_THROWS(GetProperty(env, options, "highWaterMarkBytes", highWaterMarkBytes));
472
+
473
+ std::optional<std::string> lt;
474
+ NAPI_STATUS_THROWS(GetProperty(env, options, "lt", lt));
475
+
476
+ std::optional<std::string> lte;
477
+ NAPI_STATUS_THROWS(GetProperty(env, options, "lte", lte));
478
+
479
+ std::optional<std::string> gt;
480
+ NAPI_STATUS_THROWS(GetProperty(env, options, "gt", gt));
481
+
482
+ std::optional<std::string> gte;
483
+ NAPI_STATUS_THROWS(GetProperty(env, options, "gte", gte));
484
+
485
+ rocksdb::ColumnFamilyHandle* column = database->db->DefaultColumnFamily();
486
+ NAPI_STATUS_THROWS(GetProperty(env, options, "column", column));
487
+
488
+ Encoding keyEncoding;
489
+ NAPI_STATUS_THROWS(GetProperty(env, options, "keyEncoding", keyEncoding));
490
+
491
+ Encoding valueEncoding;
492
+ NAPI_STATUS_THROWS(GetProperty(env, options, "valueEncoding", valueEncoding));
493
+
494
+ return std::make_unique<Iterator>(database, column, reverse, keys, values, limit, lt, lte, gt, gte, fillCache,
495
+ highWaterMarkBytes, tailing, keyEncoding, valueEncoding);
496
+ }
497
+
498
+ napi_value nextv(napi_env env, uint32_t count) {
499
+ napi_value finished;
500
+ NAPI_STATUS_THROWS(napi_get_boolean(env, false, &finished));
501
+
502
+ napi_value rows;
503
+ NAPI_STATUS_THROWS(napi_create_array(env, &rows));
504
+
505
+ size_t idx = 0;
506
+ size_t bytesRead = 0;
507
+ while (true) {
508
+ if (!first_) {
509
+ Next();
510
+ } else {
511
+ first_ = false;
512
+ }
513
+
514
+ if (!Valid() || !Increment()) {
515
+ ROCKS_STATUS_THROWS_NAPI(Status());
516
+ NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
517
+ break;
518
+ }
519
+
520
+ napi_value key;
521
+ napi_value val;
522
+
523
+ if (keys_ && values_) {
524
+ const auto k = CurrentKey();
525
+ const auto v = CurrentValue();
526
+ NAPI_STATUS_THROWS(Convert(env, &k, keyEncoding_, key));
527
+ NAPI_STATUS_THROWS(Convert(env, &v, valueEncoding_, val));
528
+ bytesRead += k.size() + v.size();
529
+ } else if (keys_) {
530
+ const auto k = CurrentKey();
531
+ NAPI_STATUS_THROWS(Convert(env, &k, keyEncoding_, key));
532
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &val));
533
+ bytesRead += k.size();
534
+ } else if (values_) {
535
+ const auto v = CurrentValue();
536
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
537
+ NAPI_STATUS_THROWS(Convert(env, &v, valueEncoding_, val));
538
+ bytesRead += v.size();
539
+ } else {
540
+ assert(false);
541
+ }
542
+
543
+ NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, key));
544
+ NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, val));
545
+
546
+ if (bytesRead > highWaterMarkBytes_ || idx / 2 >= count) {
547
+ break;
548
+ }
549
+ }
550
+
551
+ napi_value ret;
552
+ NAPI_STATUS_THROWS(napi_create_object(env, &ret));
553
+ NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "rows", rows));
554
+ NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "finished", finished));
555
+ return ret;
556
+ }
439
557
  };
440
558
 
441
559
  /**
@@ -525,6 +643,13 @@ NAPI_METHOD(db_get_location) {
525
643
  return result;
526
644
  }
527
645
 
646
+
647
+ NAPI_METHOD(db_query) {
648
+ NAPI_ARGV(2);
649
+
650
+ return Iterator::create(env, argv[0], argv[1])->nextv(env, std::numeric_limits<uint32_t>::max());
651
+ }
652
+
528
653
  template <typename T, typename U>
529
654
  napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
530
655
  rocksdb::ConfigOptions configOptions;
@@ -673,6 +798,7 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
673
798
 
674
799
  tableOptions.format_version = 5;
675
800
  tableOptions.checksum = rocksdb::kXXH3;
801
+ tableOptions.decouple_partitioned_filters = true;
676
802
 
677
803
  tableOptions.optimize_filters_for_memory = true;
678
804
  NAPI_STATUS_RETURN(GetProperty(env, options, "optimizeFiltersForMemory", tableOptions.optimize_filters_for_memory));
@@ -919,9 +1045,9 @@ NAPI_METHOD(db_get_many) {
919
1045
  for (auto n = 0; n < count; n++) {
920
1046
  napi_value row;
921
1047
  if (statuses[n].IsNotFound()) {
922
- NAPI_STATUS_THROWS(napi_get_null(env, &row));
923
- } else if (statuses[n].IsAborted()) {
924
1048
  NAPI_STATUS_THROWS(napi_get_undefined(env, &row));
1049
+ } else if (statuses[n].IsAborted()) {
1050
+ NAPI_STATUS_THROWS(napi_get_null(env, &row));
925
1051
  } else {
926
1052
  ROCKS_STATUS_THROWS_NAPI(statuses[n]);
927
1053
  NAPI_STATUS_THROWS(Convert(env, &values[n], valueEncoding, row));
@@ -1070,56 +1196,7 @@ NAPI_METHOD(db_get_latest_sequence) {
1070
1196
  NAPI_METHOD(iterator_init) {
1071
1197
  NAPI_ARGV(2);
1072
1198
 
1073
- Database* database;
1074
- NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&database)));
1075
-
1076
- const auto options = argv[1];
1077
-
1078
- bool reverse = false;
1079
- NAPI_STATUS_THROWS(GetProperty(env, options, "reverse", reverse));
1080
-
1081
- bool keys = true;
1082
- NAPI_STATUS_THROWS(GetProperty(env, options, "keys", keys));
1083
-
1084
- bool values = true;
1085
- NAPI_STATUS_THROWS(GetProperty(env, options, "values", values));
1086
-
1087
- bool tailing = false;
1088
- NAPI_STATUS_THROWS(GetProperty(env, options, "tailing", tailing));
1089
-
1090
- bool fillCache = false;
1091
- NAPI_STATUS_THROWS(GetProperty(env, options, "fillCache", fillCache));
1092
-
1093
- int32_t limit = -1;
1094
- NAPI_STATUS_THROWS(GetProperty(env, options, "limit", limit));
1095
-
1096
- int32_t highWaterMarkBytes = 64 * 1024;
1097
- NAPI_STATUS_THROWS(GetProperty(env, options, "highWaterMarkBytes", highWaterMarkBytes));
1098
-
1099
- std::optional<std::string> lt;
1100
- NAPI_STATUS_THROWS(GetProperty(env, options, "lt", lt));
1101
-
1102
- std::optional<std::string> lte;
1103
- NAPI_STATUS_THROWS(GetProperty(env, options, "lte", lte));
1104
-
1105
- std::optional<std::string> gt;
1106
- NAPI_STATUS_THROWS(GetProperty(env, options, "gt", gt));
1107
-
1108
- std::optional<std::string> gte;
1109
- NAPI_STATUS_THROWS(GetProperty(env, options, "gte", gte));
1110
-
1111
- rocksdb::ColumnFamilyHandle* column = database->db->DefaultColumnFamily();
1112
- NAPI_STATUS_THROWS(GetProperty(env, options, "column", column));
1113
-
1114
- Encoding keyEncoding;
1115
- NAPI_STATUS_THROWS(GetProperty(env, options, "keyEncoding", keyEncoding));
1116
-
1117
- Encoding valueEncoding;
1118
- NAPI_STATUS_THROWS(GetProperty(env, options, "valueEncoding", valueEncoding));
1119
-
1120
- auto iterator = std::unique_ptr<Iterator>(new Iterator(database, column, reverse, keys, values, limit, lt, lte, gt,
1121
- gte, fillCache, highWaterMarkBytes,
1122
- tailing, keyEncoding, valueEncoding));
1199
+ auto iterator = Iterator::create(env, argv[0], argv[1]);
1123
1200
 
1124
1201
  napi_value result;
1125
1202
  NAPI_STATUS_THROWS(napi_create_external(env, iterator.get(), Finalize<Iterator>, iterator.get(), &result));
@@ -1162,63 +1239,7 @@ NAPI_METHOD(iterator_nextv) {
1162
1239
  uint32_t count;
1163
1240
  NAPI_STATUS_THROWS(napi_get_value_uint32(env, argv[1], &count));
1164
1241
 
1165
- napi_value finished;
1166
- NAPI_STATUS_THROWS(napi_get_boolean(env, false, &finished));
1167
-
1168
- napi_value rows;
1169
- NAPI_STATUS_THROWS(napi_create_array(env, &rows));
1170
-
1171
- size_t idx = 0;
1172
- size_t bytesRead = 0;
1173
- while (true) {
1174
- if (!iterator->first_) {
1175
- iterator->Next();
1176
- } else {
1177
- iterator->first_ = false;
1178
- }
1179
-
1180
- if (!iterator->Valid() || !iterator->Increment()) {
1181
- ROCKS_STATUS_THROWS_NAPI(iterator->Status());
1182
- NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
1183
- break;
1184
- }
1185
-
1186
- napi_value key;
1187
- napi_value val;
1188
-
1189
- if (iterator->keys_ && iterator->values_) {
1190
- const auto k = iterator->CurrentKey();
1191
- const auto v = iterator->CurrentValue();
1192
- NAPI_STATUS_THROWS(Convert(env, &k, iterator->keyEncoding_, key));
1193
- NAPI_STATUS_THROWS(Convert(env, &v, iterator->valueEncoding_, val));
1194
- bytesRead += k.size() + v.size();
1195
- } else if (iterator->keys_) {
1196
- const auto k = iterator->CurrentKey();
1197
- NAPI_STATUS_THROWS(Convert(env, &k, iterator->keyEncoding_, key));
1198
- NAPI_STATUS_THROWS(napi_get_undefined(env, &val));
1199
- bytesRead += k.size();
1200
- } else if (iterator->values_) {
1201
- const auto v = iterator->CurrentValue();
1202
- NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
1203
- NAPI_STATUS_THROWS(Convert(env, &v, iterator->valueEncoding_, val));
1204
- bytesRead += v.size();
1205
- } else {
1206
- assert(false);
1207
- }
1208
-
1209
- NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, key));
1210
- NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, val));
1211
-
1212
- if (bytesRead > iterator->highWaterMarkBytes_ || idx / 2 >= count) {
1213
- break;
1214
- }
1215
- }
1216
-
1217
- napi_value ret;
1218
- NAPI_STATUS_THROWS(napi_create_object(env, &ret));
1219
- NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "rows", rows));
1220
- NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "finished", finished));
1221
- return ret;
1242
+ return iterator->nextv(env, count);
1222
1243
  }
1223
1244
 
1224
1245
  NAPI_METHOD(batch_init) {
@@ -1403,6 +1424,7 @@ NAPI_INIT() {
1403
1424
  NAPI_EXPORT_FUNCTION(db_clear);
1404
1425
  NAPI_EXPORT_FUNCTION(db_get_property);
1405
1426
  NAPI_EXPORT_FUNCTION(db_get_latest_sequence);
1427
+ NAPI_EXPORT_FUNCTION(db_query);
1406
1428
 
1407
1429
  NAPI_EXPORT_FUNCTION(iterator_init);
1408
1430
  NAPI_EXPORT_FUNCTION(iterator_seek);
@@ -3067,12 +3067,20 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) {
3067
3067
  WaitForCompaction();
3068
3068
  AssertFilesPerLevel("0,1", 0 /* cf */);
3069
3069
 
3070
+ // We should calculate the limit by obtaining the number of env background
3071
+ // threads, because the current test case will share the same env
3072
+ // with another case that may have already increased the number of
3073
+ // background threads which is larger than kParallelismLimit
3074
+ const auto limit = env_->GetBackgroundThreads(Env::Priority::LOW);
3075
+
3070
3076
  // Block the compaction thread pool so marked files accumulate in L0.
3071
- test::SleepingBackgroundTask sleeping_tasks[kParallelismLimit];
3072
- for (int i = 0; i < kParallelismLimit; i++) {
3077
+ std::vector<std::shared_ptr<test::SleepingBackgroundTask>> sleeping_tasks;
3078
+ for (int i = 0; i < limit; i++) {
3079
+ sleeping_tasks.emplace_back(
3080
+ std::make_shared<test::SleepingBackgroundTask>());
3073
3081
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
3074
- &sleeping_tasks[i], Env::Priority::LOW);
3075
- sleeping_tasks[i].WaitUntilSleeping();
3082
+ sleeping_tasks[i].get(), Env::Priority::LOW);
3083
+ sleeping_tasks[i]->WaitUntilSleeping();
3076
3084
  }
3077
3085
 
3078
3086
  // Zero marked upper-level files. No speedup.
@@ -3091,9 +3099,9 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) {
3091
3099
  ASSERT_EQ(kParallelismLimit, dbfull()->TEST_BGCompactionsAllowed());
3092
3100
  AssertFilesPerLevel("2,1", 0 /* cf */);
3093
3101
 
3094
- for (int i = 0; i < kParallelismLimit; i++) {
3095
- sleeping_tasks[i].WakeUp();
3096
- sleeping_tasks[i].WaitUntilDone();
3102
+ for (int i = 0; i < limit; i++) {
3103
+ sleeping_tasks[i]->WakeUp();
3104
+ sleeping_tasks[i]->WaitUntilDone();
3097
3105
  }
3098
3106
  }
3099
3107
 
@@ -552,7 +552,8 @@ class CompactionJobTestBase : public testing::Test {
552
552
  /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
553
553
  /*error_handler=*/nullptr, /*read_only=*/false));
554
554
  compaction_job_stats_.Reset();
555
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
555
+ ASSERT_OK(
556
+ SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
556
557
 
557
558
  VersionEdit new_db;
558
559
  new_db.SetLogNumber(0);
@@ -575,7 +576,8 @@ class CompactionJobTestBase : public testing::Test {
575
576
  }
576
577
  ASSERT_OK(s);
577
578
  // Make "CURRENT" file that points to the new manifest file.
578
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
579
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
580
+ Temperature::kUnknown, nullptr);
579
581
 
580
582
  ASSERT_OK(s);
581
583
 
@@ -925,11 +925,15 @@ bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
925
925
  }
926
926
  uint64_t l0_size = 0;
927
927
  for (const auto& file : l0_files) {
928
- l0_size += file->fd.GetFileSize();
928
+ assert(file->compensated_file_size >= file->fd.GetFileSize());
929
+ // Compact down L0s with more deletions.
930
+ l0_size += file->compensated_file_size;
929
931
  }
930
- const uint64_t min_lbase_size =
931
- l0_size * static_cast<uint64_t>(std::max(
932
- 10.0, mutable_cf_options_.max_bytes_for_level_multiplier));
932
+
933
+ // Avoid L0->Lbase compactions that are inefficient for write-amp.
934
+ const double kMultiplier =
935
+ std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2;
936
+ const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier);
933
937
  assert(min_lbase_size >= l0_size);
934
938
  const std::vector<FileMetaData*>& lbase_files =
935
939
  vstorage_->LevelFiles(/*level=*/base_level);
@@ -214,7 +214,10 @@ class CompactionPickerTest : public CompactionPickerTestBase {
214
214
  explicit CompactionPickerTest()
215
215
  : CompactionPickerTestBase(BytewiseComparator()) {}
216
216
 
217
- ~CompactionPickerTest() override = default;
217
+ ~CompactionPickerTest() override {
218
+ SyncPoint::GetInstance()->ClearAllCallBacks();
219
+ SyncPoint::GetInstance()->DisableProcessing();
220
+ }
218
221
  };
219
222
 
220
223
  class CompactionPickerU64TsTest : public CompactionPickerTestBase {
@@ -4284,27 +4287,28 @@ TEST_F(CompactionPickerTest, IntraL0WhenL0IsSmall) {
4284
4287
  SCOPED_TRACE("lbase_size_multiplier=" +
4285
4288
  std::to_string(lbase_size_multiplier));
4286
4289
  NewVersionStorage(6, kCompactionStyleLevel);
4287
- // When L0 size is <= Lbase size / max_bytes_for_level_multiplier,
4290
+ // When L0 size is <= Lbase size / max_bytes_for_level_multiplier / 2,
4288
4291
  // intra-L0 compaction is picked. Otherwise, L0->L1
4289
4292
  // compaction is picked.
4293
+ // compensated_file_size will be used to compute total l0 size.
4290
4294
  Add(/*level=*/0, /*file_number=*/1U, /*smallest=*/"100",
4291
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4295
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4292
4296
  /*smallest_seq=*/10, /*largest_seq=*/11,
4293
4297
  /*compensated_file_size=*/1000);
4294
4298
  Add(/*level=*/0, /*file_number=*/2U, /*smallest=*/"100",
4295
- /*largest=*/"100", /*file_size=*/1000, /*path_id=*/0,
4299
+ /*largest=*/"100", /*file_size=*/10, /*path_id=*/0,
4296
4300
  /*smallest_seq=*/20, /*largest_seq=*/21,
4297
4301
  /*compensated_file_size=*/1000);
4298
4302
  Add(/*level=*/0, /*file_number=*/3U, /*smallest=*/"100",
4299
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4303
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4300
4304
  /*smallest_seq=*/30, /*largest_seq=*/31,
4301
4305
  /*compensated_file_size=*/1000);
4302
4306
  Add(/*level=*/0, /*file_number=*/4U, /*smallest=*/"100",
4303
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4307
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4304
4308
  /*smallest_seq=*/40, /*largest_seq=*/41,
4305
4309
  /*compensated_file_size=*/1000);
4306
4310
  const uint64_t l0_size = 4000;
4307
- const uint64_t lbase_size = l0_size * lbase_size_multiplier;
4311
+ const uint64_t lbase_size = l0_size * lbase_size_multiplier * 2;
4308
4312
  Add(/*level=*/1, /*file_number=*/5U, /*smallest=*/"100",
4309
4313
  /*largest=*/"200", /*file_size=*/lbase_size, /*path_id=*/0,
4310
4314
  /*smallest_seq=*/0, /*largest_seq=*/0,
@@ -140,9 +140,13 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
140
140
  return compaction_status;
141
141
  }
142
142
 
143
+ // CompactionServiceJobStatus::kSuccess was returned, but somehow we failed to
144
+ // read the result. Consider this as an installation failure
143
145
  if (!s.ok()) {
144
146
  sub_compact->status = s;
145
147
  compaction_result.status.PermitUncheckedError();
148
+ db_options_.compaction_service->OnInstallation(
149
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
146
150
  return CompactionServiceJobStatus::kFailure;
147
151
  }
148
152
  sub_compact->status = compaction_result.status;
@@ -154,18 +158,14 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
154
158
  is_first_one = false;
155
159
  }
156
160
 
157
- ROCKS_LOG_INFO(db_options_.info_log,
158
- "[%s] [JOB %d] Receive remote compaction result, output path: "
159
- "%s, files: %s",
160
- compaction_input.column_family.name.c_str(), job_id_,
161
- compaction_result.output_path.c_str(),
162
- output_files_oss.str().c_str());
163
-
164
- if (!s.ok()) {
165
- sub_compact->status = s;
166
- return CompactionServiceJobStatus::kFailure;
167
- }
161
+ ROCKS_LOG_INFO(
162
+ db_options_.info_log,
163
+ "[%s] [JOB %d] Received remote compaction result, output path: "
164
+ "%s, files: %s",
165
+ compaction_input.column_family.name.c_str(), job_id_,
166
+ compaction_result.output_path.c_str(), output_files_oss.str().c_str());
168
167
 
168
+ // Installation Starts
169
169
  for (const auto& file : compaction_result.output_files) {
170
170
  uint64_t file_num = versions_->NewFileNumber();
171
171
  auto src_file = compaction_result.output_path + "/" + file.file_name;
@@ -174,6 +174,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
174
174
  s = fs_->RenameFile(src_file, tgt_file, IOOptions(), nullptr);
175
175
  if (!s.ok()) {
176
176
  sub_compact->status = s;
177
+ db_options_.compaction_service->OnInstallation(
178
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
177
179
  return CompactionServiceJobStatus::kFailure;
178
180
  }
179
181
 
@@ -182,6 +184,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
182
184
  s = fs_->GetFileSize(tgt_file, IOOptions(), &file_size, nullptr);
183
185
  if (!s.ok()) {
184
186
  sub_compact->status = s;
187
+ db_options_.compaction_service->OnInstallation(
188
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
185
189
  return CompactionServiceJobStatus::kFailure;
186
190
  }
187
191
  meta.fd = FileDescriptor(file_num, compaction->output_path_id(), file_size,
@@ -206,6 +210,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
206
210
  RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read);
207
211
  RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES,
208
212
  compaction_result.bytes_written);
213
+ db_options_.compaction_service->OnInstallation(
214
+ response.scheduled_job_id, CompactionServiceJobStatus::kSuccess);
209
215
  return CompactionServiceJobStatus::kSuccess;
210
216
  }
211
217
 
@@ -108,6 +108,11 @@ class MyTestCompactionService : public CompactionService {
108
108
  }
109
109
  }
110
110
 
111
+ void OnInstallation(const std::string& /*scheduled_job_id*/,
112
+ CompactionServiceJobStatus status) override {
113
+ final_updated_status_ = status;
114
+ }
115
+
111
116
  int GetCompactionNum() { return compaction_num_.load(); }
112
117
 
113
118
  CompactionServiceJobInfo GetCompactionInfoForStart() { return start_info_; }
@@ -136,6 +141,10 @@ class MyTestCompactionService : public CompactionService {
136
141
 
137
142
  void SetCanceled(bool canceled) { canceled_ = canceled; }
138
143
 
144
+ CompactionServiceJobStatus GetFinalCompactionServiceJobStatus() {
145
+ return final_updated_status_.load();
146
+ }
147
+
139
148
  private:
140
149
  InstrumentedMutex mutex_;
141
150
  std::atomic_int compaction_num_{0};
@@ -158,6 +167,8 @@ class MyTestCompactionService : public CompactionService {
158
167
  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
159
168
  table_properties_collector_factories_;
160
169
  std::atomic_bool canceled_{false};
170
+ std::atomic<CompactionServiceJobStatus> final_updated_status_{
171
+ CompactionServiceJobStatus::kUseLocal};
161
172
  };
162
173
 
163
174
  class CompactionServiceTest : public DBTestBase {
@@ -255,6 +266,8 @@ TEST_F(CompactionServiceTest, BasicCompactions) {
255
266
 
256
267
  auto my_cs = GetCompactionService();
257
268
  ASSERT_GE(my_cs->GetCompactionNum(), 1);
269
+ ASSERT_EQ(CompactionServiceJobStatus::kSuccess,
270
+ my_cs->GetFinalCompactionServiceJobStatus());
258
271
 
259
272
  // make sure the compaction statistics is only recorded on the remote side
260
273
  ASSERT_GE(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), 1);
@@ -437,6 +450,8 @@ TEST_F(CompactionServiceTest, InvalidResult) {
437
450
  Slice end(end_str);
438
451
  Status s = db_->CompactRange(CompactRangeOptions(), &start, &end);
439
452
  ASSERT_FALSE(s.ok());
453
+ ASSERT_EQ(CompactionServiceJobStatus::kFailure,
454
+ my_cs->GetFinalCompactionServiceJobStatus());
440
455
  }
441
456
 
442
457
  TEST_F(CompactionServiceTest, SubCompaction) {