@nxtedition/rocksdb 11.0.2 → 11.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/binding.cc +133 -122
  2. package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
  3. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  4. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
  5. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
  6. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
  8. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
  9. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
  10. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
  11. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
  12. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
  13. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
  14. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  15. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
  16. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
  17. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
  18. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
  19. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
  20. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
  21. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
  22. package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
  23. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  24. package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
  25. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
  26. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
  27. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
  29. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  31. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
  32. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
  33. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
  34. package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
  35. package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
  36. package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
  37. package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
  38. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
  39. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
  40. package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
  41. package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
  42. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
  43. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
  44. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
  45. package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
  46. package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
  56. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
  60. package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
  61. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
  62. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
  63. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
  64. package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
  65. package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
  66. package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
  67. package/deps/rocksdb/rocksdb/file/filename.h +2 -1
  68. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
  69. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
  70. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
  71. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
  73. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
  75. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
  76. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
  77. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
  80. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
  81. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  84. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
  85. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
  86. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
  87. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
  88. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
  89. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  90. package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
  91. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  92. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
  93. package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
  94. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
  99. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
  100. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
  101. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
  102. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
  103. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
  104. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
  105. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
  106. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
  107. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
  108. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
  110. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
  111. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
  112. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  113. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
  114. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
  115. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
  116. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
  117. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
  118. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
  119. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
  120. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
  121. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  122. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
  123. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
  124. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  125. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
  126. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
  127. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
  128. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  129. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
  130. package/index.js +5 -17
  131. package/iterator.js +9 -1
  132. package/package.json +1 -1
  133. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  134. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
package/binding.cc CHANGED
@@ -37,7 +37,7 @@ class NullLogger : public rocksdb::Logger {
37
37
  };
38
38
 
39
39
  struct Database;
40
- struct Iterator;
40
+ class Iterator;
41
41
 
42
42
  struct ColumnFamily {
43
43
  rocksdb::ColumnFamilyHandle* handle;
@@ -285,10 +285,7 @@ struct BaseIterator : public Closable {
285
285
  const int limit,
286
286
  const bool fillCache,
287
287
  bool tailing = false)
288
- : database_(database),
289
- column_(column),
290
- reverse_(reverse),
291
- limit_(limit) {
288
+ : database_(database), column_(column), reverse_(reverse), limit_(limit) {
292
289
  if (lte) {
293
290
  upper_bound_ = rocksdb::PinnableSlice();
294
291
  *upper_bound_->GetSelf() = std::move(*lte) + '\0';
@@ -393,7 +390,6 @@ struct BaseIterator : public Closable {
393
390
  rocksdb::ColumnFamilyHandle* column_;
394
391
 
395
392
  private:
396
-
397
393
  int count_ = 0;
398
394
  std::optional<rocksdb::PinnableSlice> lower_bound_;
399
395
  std::optional<rocksdb::PinnableSlice> upper_bound_;
@@ -402,7 +398,15 @@ struct BaseIterator : public Closable {
402
398
  const int limit_;
403
399
  };
404
400
 
405
- struct Iterator final : public BaseIterator {
401
+ class Iterator final : public BaseIterator {
402
+ const bool keys_;
403
+ const bool values_;
404
+ const size_t highWaterMarkBytes_;
405
+ bool first_ = true;
406
+ const Encoding keyEncoding_;
407
+ const Encoding valueEncoding_;
408
+
409
+ public:
406
410
  Iterator(Database* database,
407
411
  rocksdb::ColumnFamilyHandle* column,
408
412
  const bool reverse,
@@ -430,12 +434,115 @@ struct Iterator final : public BaseIterator {
430
434
  return BaseIterator::Seek(target);
431
435
  }
432
436
 
433
- const bool keys_;
434
- const bool values_;
435
- const size_t highWaterMarkBytes_;
436
- bool first_ = true;
437
- const Encoding keyEncoding_;
438
- const Encoding valueEncoding_;
437
+ static std::unique_ptr<Iterator> create(napi_env env, napi_value db, napi_value options) {
438
+ Database* database;
439
+ NAPI_STATUS_THROWS(napi_get_value_external(env, db, reinterpret_cast<void**>(&database)));
440
+
441
+ bool reverse = false;
442
+ NAPI_STATUS_THROWS(GetProperty(env, options, "reverse", reverse));
443
+
444
+ bool keys = true;
445
+ NAPI_STATUS_THROWS(GetProperty(env, options, "keys", keys));
446
+
447
+ bool values = true;
448
+ NAPI_STATUS_THROWS(GetProperty(env, options, "values", values));
449
+
450
+ bool tailing = false;
451
+ NAPI_STATUS_THROWS(GetProperty(env, options, "tailing", tailing));
452
+
453
+ bool fillCache = false;
454
+ NAPI_STATUS_THROWS(GetProperty(env, options, "fillCache", fillCache));
455
+
456
+ int32_t limit = -1;
457
+ NAPI_STATUS_THROWS(GetProperty(env, options, "limit", limit));
458
+
459
+ int32_t highWaterMarkBytes = 64 * 1024;
460
+ NAPI_STATUS_THROWS(GetProperty(env, options, "highWaterMarkBytes", highWaterMarkBytes));
461
+
462
+ std::optional<std::string> lt;
463
+ NAPI_STATUS_THROWS(GetProperty(env, options, "lt", lt));
464
+
465
+ std::optional<std::string> lte;
466
+ NAPI_STATUS_THROWS(GetProperty(env, options, "lte", lte));
467
+
468
+ std::optional<std::string> gt;
469
+ NAPI_STATUS_THROWS(GetProperty(env, options, "gt", gt));
470
+
471
+ std::optional<std::string> gte;
472
+ NAPI_STATUS_THROWS(GetProperty(env, options, "gte", gte));
473
+
474
+ rocksdb::ColumnFamilyHandle* column = database->db->DefaultColumnFamily();
475
+ NAPI_STATUS_THROWS(GetProperty(env, options, "column", column));
476
+
477
+ Encoding keyEncoding;
478
+ NAPI_STATUS_THROWS(GetProperty(env, options, "keyEncoding", keyEncoding));
479
+
480
+ Encoding valueEncoding;
481
+ NAPI_STATUS_THROWS(GetProperty(env, options, "valueEncoding", valueEncoding));
482
+
483
+ return std::make_unique<Iterator>(database, column, reverse, keys, values, limit, lt, lte, gt, gte, fillCache,
484
+ highWaterMarkBytes, tailing, keyEncoding, valueEncoding);
485
+ }
486
+
487
+ napi_value nextv(napi_env env, uint32_t count) {
488
+ napi_value finished;
489
+ NAPI_STATUS_THROWS(napi_get_boolean(env, false, &finished));
490
+
491
+ napi_value rows;
492
+ NAPI_STATUS_THROWS(napi_create_array(env, &rows));
493
+
494
+ size_t idx = 0;
495
+ size_t bytesRead = 0;
496
+ while (true) {
497
+ if (!first_) {
498
+ Next();
499
+ } else {
500
+ first_ = false;
501
+ }
502
+
503
+ if (!Valid() || !Increment()) {
504
+ ROCKS_STATUS_THROWS_NAPI(Status());
505
+ NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
506
+ break;
507
+ }
508
+
509
+ napi_value key;
510
+ napi_value val;
511
+
512
+ if (keys_ && values_) {
513
+ const auto k = CurrentKey();
514
+ const auto v = CurrentValue();
515
+ NAPI_STATUS_THROWS(Convert(env, &k, keyEncoding_, key));
516
+ NAPI_STATUS_THROWS(Convert(env, &v, valueEncoding_, val));
517
+ bytesRead += k.size() + v.size();
518
+ } else if (keys_) {
519
+ const auto k = CurrentKey();
520
+ NAPI_STATUS_THROWS(Convert(env, &k, keyEncoding_, key));
521
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &val));
522
+ bytesRead += k.size();
523
+ } else if (values_) {
524
+ const auto v = CurrentValue();
525
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
526
+ NAPI_STATUS_THROWS(Convert(env, &v, valueEncoding_, val));
527
+ bytesRead += v.size();
528
+ } else {
529
+ assert(false);
530
+ }
531
+
532
+ NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, key));
533
+ NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, val));
534
+
535
+ if (bytesRead > highWaterMarkBytes_ || idx / 2 >= count) {
536
+ break;
537
+ }
538
+ }
539
+
540
+ napi_value ret;
541
+ NAPI_STATUS_THROWS(napi_create_object(env, &ret));
542
+ NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "rows", rows));
543
+ NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "finished", finished));
544
+ return ret;
545
+ }
439
546
  };
440
547
 
441
548
  /**
@@ -525,6 +632,13 @@ NAPI_METHOD(db_get_location) {
525
632
  return result;
526
633
  }
527
634
 
635
+
636
+ NAPI_METHOD(db_query) {
637
+ NAPI_ARGV(2);
638
+
639
+ return Iterator::create(env, argv[0], argv[1])->nextv(env, std::numeric_limits<uint32_t>::max());
640
+ }
641
+
528
642
  template <typename T, typename U>
529
643
  napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
530
644
  rocksdb::ConfigOptions configOptions;
@@ -673,6 +787,7 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
673
787
 
674
788
  tableOptions.format_version = 5;
675
789
  tableOptions.checksum = rocksdb::kXXH3;
790
+ tableOptions.decouple_partitioned_filters = true;
676
791
 
677
792
  tableOptions.optimize_filters_for_memory = true;
678
793
  NAPI_STATUS_RETURN(GetProperty(env, options, "optimizeFiltersForMemory", tableOptions.optimize_filters_for_memory));
@@ -919,9 +1034,9 @@ NAPI_METHOD(db_get_many) {
919
1034
  for (auto n = 0; n < count; n++) {
920
1035
  napi_value row;
921
1036
  if (statuses[n].IsNotFound()) {
922
- NAPI_STATUS_THROWS(napi_get_null(env, &row));
923
- } else if (statuses[n].IsAborted()) {
924
1037
  NAPI_STATUS_THROWS(napi_get_undefined(env, &row));
1038
+ } else if (statuses[n].IsAborted()) {
1039
+ NAPI_STATUS_THROWS(napi_get_null(env, &row));
925
1040
  } else {
926
1041
  ROCKS_STATUS_THROWS_NAPI(statuses[n]);
927
1042
  NAPI_STATUS_THROWS(Convert(env, &values[n], valueEncoding, row));
@@ -1070,56 +1185,7 @@ NAPI_METHOD(db_get_latest_sequence) {
1070
1185
  NAPI_METHOD(iterator_init) {
1071
1186
  NAPI_ARGV(2);
1072
1187
 
1073
- Database* database;
1074
- NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&database)));
1075
-
1076
- const auto options = argv[1];
1077
-
1078
- bool reverse = false;
1079
- NAPI_STATUS_THROWS(GetProperty(env, options, "reverse", reverse));
1080
-
1081
- bool keys = true;
1082
- NAPI_STATUS_THROWS(GetProperty(env, options, "keys", keys));
1083
-
1084
- bool values = true;
1085
- NAPI_STATUS_THROWS(GetProperty(env, options, "values", values));
1086
-
1087
- bool tailing = false;
1088
- NAPI_STATUS_THROWS(GetProperty(env, options, "tailing", tailing));
1089
-
1090
- bool fillCache = false;
1091
- NAPI_STATUS_THROWS(GetProperty(env, options, "fillCache", fillCache));
1092
-
1093
- int32_t limit = -1;
1094
- NAPI_STATUS_THROWS(GetProperty(env, options, "limit", limit));
1095
-
1096
- int32_t highWaterMarkBytes = 64 * 1024;
1097
- NAPI_STATUS_THROWS(GetProperty(env, options, "highWaterMarkBytes", highWaterMarkBytes));
1098
-
1099
- std::optional<std::string> lt;
1100
- NAPI_STATUS_THROWS(GetProperty(env, options, "lt", lt));
1101
-
1102
- std::optional<std::string> lte;
1103
- NAPI_STATUS_THROWS(GetProperty(env, options, "lte", lte));
1104
-
1105
- std::optional<std::string> gt;
1106
- NAPI_STATUS_THROWS(GetProperty(env, options, "gt", gt));
1107
-
1108
- std::optional<std::string> gte;
1109
- NAPI_STATUS_THROWS(GetProperty(env, options, "gte", gte));
1110
-
1111
- rocksdb::ColumnFamilyHandle* column = database->db->DefaultColumnFamily();
1112
- NAPI_STATUS_THROWS(GetProperty(env, options, "column", column));
1113
-
1114
- Encoding keyEncoding;
1115
- NAPI_STATUS_THROWS(GetProperty(env, options, "keyEncoding", keyEncoding));
1116
-
1117
- Encoding valueEncoding;
1118
- NAPI_STATUS_THROWS(GetProperty(env, options, "valueEncoding", valueEncoding));
1119
-
1120
- auto iterator = std::unique_ptr<Iterator>(new Iterator(database, column, reverse, keys, values, limit, lt, lte, gt,
1121
- gte, fillCache, highWaterMarkBytes,
1122
- tailing, keyEncoding, valueEncoding));
1188
+ auto iterator = Iterator::create(env, argv[0], argv[1]);
1123
1189
 
1124
1190
  napi_value result;
1125
1191
  NAPI_STATUS_THROWS(napi_create_external(env, iterator.get(), Finalize<Iterator>, iterator.get(), &result));
@@ -1162,63 +1228,7 @@ NAPI_METHOD(iterator_nextv) {
1162
1228
  uint32_t count;
1163
1229
  NAPI_STATUS_THROWS(napi_get_value_uint32(env, argv[1], &count));
1164
1230
 
1165
- napi_value finished;
1166
- NAPI_STATUS_THROWS(napi_get_boolean(env, false, &finished));
1167
-
1168
- napi_value rows;
1169
- NAPI_STATUS_THROWS(napi_create_array(env, &rows));
1170
-
1171
- size_t idx = 0;
1172
- size_t bytesRead = 0;
1173
- while (true) {
1174
- if (!iterator->first_) {
1175
- iterator->Next();
1176
- } else {
1177
- iterator->first_ = false;
1178
- }
1179
-
1180
- if (!iterator->Valid() || !iterator->Increment()) {
1181
- ROCKS_STATUS_THROWS_NAPI(iterator->Status());
1182
- NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
1183
- break;
1184
- }
1185
-
1186
- napi_value key;
1187
- napi_value val;
1188
-
1189
- if (iterator->keys_ && iterator->values_) {
1190
- const auto k = iterator->CurrentKey();
1191
- const auto v = iterator->CurrentValue();
1192
- NAPI_STATUS_THROWS(Convert(env, &k, iterator->keyEncoding_, key));
1193
- NAPI_STATUS_THROWS(Convert(env, &v, iterator->valueEncoding_, val));
1194
- bytesRead += k.size() + v.size();
1195
- } else if (iterator->keys_) {
1196
- const auto k = iterator->CurrentKey();
1197
- NAPI_STATUS_THROWS(Convert(env, &k, iterator->keyEncoding_, key));
1198
- NAPI_STATUS_THROWS(napi_get_undefined(env, &val));
1199
- bytesRead += k.size();
1200
- } else if (iterator->values_) {
1201
- const auto v = iterator->CurrentValue();
1202
- NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
1203
- NAPI_STATUS_THROWS(Convert(env, &v, iterator->valueEncoding_, val));
1204
- bytesRead += v.size();
1205
- } else {
1206
- assert(false);
1207
- }
1208
-
1209
- NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, key));
1210
- NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, val));
1211
-
1212
- if (bytesRead > iterator->highWaterMarkBytes_ || idx / 2 >= count) {
1213
- break;
1214
- }
1215
- }
1216
-
1217
- napi_value ret;
1218
- NAPI_STATUS_THROWS(napi_create_object(env, &ret));
1219
- NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "rows", rows));
1220
- NAPI_STATUS_THROWS(napi_set_named_property(env, ret, "finished", finished));
1221
- return ret;
1231
+ return iterator->nextv(env, count);
1222
1232
  }
1223
1233
 
1224
1234
  NAPI_METHOD(batch_init) {
@@ -1403,6 +1413,7 @@ NAPI_INIT() {
1403
1413
  NAPI_EXPORT_FUNCTION(db_clear);
1404
1414
  NAPI_EXPORT_FUNCTION(db_get_property);
1405
1415
  NAPI_EXPORT_FUNCTION(db_get_latest_sequence);
1416
+ NAPI_EXPORT_FUNCTION(db_query);
1406
1417
 
1407
1418
  NAPI_EXPORT_FUNCTION(iterator_init);
1408
1419
  NAPI_EXPORT_FUNCTION(iterator_seek);
@@ -3067,12 +3067,20 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) {
3067
3067
  WaitForCompaction();
3068
3068
  AssertFilesPerLevel("0,1", 0 /* cf */);
3069
3069
 
3070
+ // We should calculate the limit by obtaining the number of env background
3071
+ // threads, because the current test case will share the same env
3072
+ // with another case that may have already increased the number of
3073
+ // background threads which is larger than kParallelismLimit
3074
+ const auto limit = env_->GetBackgroundThreads(Env::Priority::LOW);
3075
+
3070
3076
  // Block the compaction thread pool so marked files accumulate in L0.
3071
- test::SleepingBackgroundTask sleeping_tasks[kParallelismLimit];
3072
- for (int i = 0; i < kParallelismLimit; i++) {
3077
+ std::vector<std::shared_ptr<test::SleepingBackgroundTask>> sleeping_tasks;
3078
+ for (int i = 0; i < limit; i++) {
3079
+ sleeping_tasks.emplace_back(
3080
+ std::make_shared<test::SleepingBackgroundTask>());
3073
3081
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
3074
- &sleeping_tasks[i], Env::Priority::LOW);
3075
- sleeping_tasks[i].WaitUntilSleeping();
3082
+ sleeping_tasks[i].get(), Env::Priority::LOW);
3083
+ sleeping_tasks[i]->WaitUntilSleeping();
3076
3084
  }
3077
3085
 
3078
3086
  // Zero marked upper-level files. No speedup.
@@ -3091,9 +3099,9 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) {
3091
3099
  ASSERT_EQ(kParallelismLimit, dbfull()->TEST_BGCompactionsAllowed());
3092
3100
  AssertFilesPerLevel("2,1", 0 /* cf */);
3093
3101
 
3094
- for (int i = 0; i < kParallelismLimit; i++) {
3095
- sleeping_tasks[i].WakeUp();
3096
- sleeping_tasks[i].WaitUntilDone();
3102
+ for (int i = 0; i < limit; i++) {
3103
+ sleeping_tasks[i]->WakeUp();
3104
+ sleeping_tasks[i]->WaitUntilDone();
3097
3105
  }
3098
3106
  }
3099
3107
 
@@ -552,7 +552,8 @@ class CompactionJobTestBase : public testing::Test {
552
552
  /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
553
553
  /*error_handler=*/nullptr, /*read_only=*/false));
554
554
  compaction_job_stats_.Reset();
555
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
555
+ ASSERT_OK(
556
+ SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
556
557
 
557
558
  VersionEdit new_db;
558
559
  new_db.SetLogNumber(0);
@@ -575,7 +576,8 @@ class CompactionJobTestBase : public testing::Test {
575
576
  }
576
577
  ASSERT_OK(s);
577
578
  // Make "CURRENT" file that points to the new manifest file.
578
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
579
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
580
+ Temperature::kUnknown, nullptr);
579
581
 
580
582
  ASSERT_OK(s);
581
583
 
@@ -925,11 +925,15 @@ bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
925
925
  }
926
926
  uint64_t l0_size = 0;
927
927
  for (const auto& file : l0_files) {
928
- l0_size += file->fd.GetFileSize();
928
+ assert(file->compensated_file_size >= file->fd.GetFileSize());
929
+ // Compact down L0s with more deletions.
930
+ l0_size += file->compensated_file_size;
929
931
  }
930
- const uint64_t min_lbase_size =
931
- l0_size * static_cast<uint64_t>(std::max(
932
- 10.0, mutable_cf_options_.max_bytes_for_level_multiplier));
932
+
933
+ // Avoid L0->Lbase compactions that are inefficient for write-amp.
934
+ const double kMultiplier =
935
+ std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2;
936
+ const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier);
933
937
  assert(min_lbase_size >= l0_size);
934
938
  const std::vector<FileMetaData*>& lbase_files =
935
939
  vstorage_->LevelFiles(/*level=*/base_level);
@@ -214,7 +214,10 @@ class CompactionPickerTest : public CompactionPickerTestBase {
214
214
  explicit CompactionPickerTest()
215
215
  : CompactionPickerTestBase(BytewiseComparator()) {}
216
216
 
217
- ~CompactionPickerTest() override = default;
217
+ ~CompactionPickerTest() override {
218
+ SyncPoint::GetInstance()->ClearAllCallBacks();
219
+ SyncPoint::GetInstance()->DisableProcessing();
220
+ }
218
221
  };
219
222
 
220
223
  class CompactionPickerU64TsTest : public CompactionPickerTestBase {
@@ -4284,27 +4287,28 @@ TEST_F(CompactionPickerTest, IntraL0WhenL0IsSmall) {
4284
4287
  SCOPED_TRACE("lbase_size_multiplier=" +
4285
4288
  std::to_string(lbase_size_multiplier));
4286
4289
  NewVersionStorage(6, kCompactionStyleLevel);
4287
- // When L0 size is <= Lbase size / max_bytes_for_level_multiplier,
4290
+ // When L0 size is <= Lbase size / max_bytes_for_level_multiplier / 2,
4288
4291
  // intra-L0 compaction is picked. Otherwise, L0->L1
4289
4292
  // compaction is picked.
4293
+ // compensated_file_size will be used to compute total l0 size.
4290
4294
  Add(/*level=*/0, /*file_number=*/1U, /*smallest=*/"100",
4291
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4295
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4292
4296
  /*smallest_seq=*/10, /*largest_seq=*/11,
4293
4297
  /*compensated_file_size=*/1000);
4294
4298
  Add(/*level=*/0, /*file_number=*/2U, /*smallest=*/"100",
4295
- /*largest=*/"100", /*file_size=*/1000, /*path_id=*/0,
4299
+ /*largest=*/"100", /*file_size=*/10, /*path_id=*/0,
4296
4300
  /*smallest_seq=*/20, /*largest_seq=*/21,
4297
4301
  /*compensated_file_size=*/1000);
4298
4302
  Add(/*level=*/0, /*file_number=*/3U, /*smallest=*/"100",
4299
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4303
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4300
4304
  /*smallest_seq=*/30, /*largest_seq=*/31,
4301
4305
  /*compensated_file_size=*/1000);
4302
4306
  Add(/*level=*/0, /*file_number=*/4U, /*smallest=*/"100",
4303
- /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
4307
+ /*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
4304
4308
  /*smallest_seq=*/40, /*largest_seq=*/41,
4305
4309
  /*compensated_file_size=*/1000);
4306
4310
  const uint64_t l0_size = 4000;
4307
- const uint64_t lbase_size = l0_size * lbase_size_multiplier;
4311
+ const uint64_t lbase_size = l0_size * lbase_size_multiplier * 2;
4308
4312
  Add(/*level=*/1, /*file_number=*/5U, /*smallest=*/"100",
4309
4313
  /*largest=*/"200", /*file_size=*/lbase_size, /*path_id=*/0,
4310
4314
  /*smallest_seq=*/0, /*largest_seq=*/0,
@@ -140,9 +140,13 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
140
140
  return compaction_status;
141
141
  }
142
142
 
143
+ // CompactionServiceJobStatus::kSuccess was returned, but somehow we failed to
144
+ // read the result. Consider this as an installation failure
143
145
  if (!s.ok()) {
144
146
  sub_compact->status = s;
145
147
  compaction_result.status.PermitUncheckedError();
148
+ db_options_.compaction_service->OnInstallation(
149
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
146
150
  return CompactionServiceJobStatus::kFailure;
147
151
  }
148
152
  sub_compact->status = compaction_result.status;
@@ -154,18 +158,14 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
154
158
  is_first_one = false;
155
159
  }
156
160
 
157
- ROCKS_LOG_INFO(db_options_.info_log,
158
- "[%s] [JOB %d] Receive remote compaction result, output path: "
159
- "%s, files: %s",
160
- compaction_input.column_family.name.c_str(), job_id_,
161
- compaction_result.output_path.c_str(),
162
- output_files_oss.str().c_str());
163
-
164
- if (!s.ok()) {
165
- sub_compact->status = s;
166
- return CompactionServiceJobStatus::kFailure;
167
- }
161
+ ROCKS_LOG_INFO(
162
+ db_options_.info_log,
163
+ "[%s] [JOB %d] Received remote compaction result, output path: "
164
+ "%s, files: %s",
165
+ compaction_input.column_family.name.c_str(), job_id_,
166
+ compaction_result.output_path.c_str(), output_files_oss.str().c_str());
168
167
 
168
+ // Installation Starts
169
169
  for (const auto& file : compaction_result.output_files) {
170
170
  uint64_t file_num = versions_->NewFileNumber();
171
171
  auto src_file = compaction_result.output_path + "/" + file.file_name;
@@ -174,6 +174,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
174
174
  s = fs_->RenameFile(src_file, tgt_file, IOOptions(), nullptr);
175
175
  if (!s.ok()) {
176
176
  sub_compact->status = s;
177
+ db_options_.compaction_service->OnInstallation(
178
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
177
179
  return CompactionServiceJobStatus::kFailure;
178
180
  }
179
181
 
@@ -182,6 +184,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
182
184
  s = fs_->GetFileSize(tgt_file, IOOptions(), &file_size, nullptr);
183
185
  if (!s.ok()) {
184
186
  sub_compact->status = s;
187
+ db_options_.compaction_service->OnInstallation(
188
+ response.scheduled_job_id, CompactionServiceJobStatus::kFailure);
185
189
  return CompactionServiceJobStatus::kFailure;
186
190
  }
187
191
  meta.fd = FileDescriptor(file_num, compaction->output_path_id(), file_size,
@@ -206,6 +210,8 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
206
210
  RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read);
207
211
  RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES,
208
212
  compaction_result.bytes_written);
213
+ db_options_.compaction_service->OnInstallation(
214
+ response.scheduled_job_id, CompactionServiceJobStatus::kSuccess);
209
215
  return CompactionServiceJobStatus::kSuccess;
210
216
  }
211
217
 
@@ -108,6 +108,11 @@ class MyTestCompactionService : public CompactionService {
108
108
  }
109
109
  }
110
110
 
111
+ void OnInstallation(const std::string& /*scheduled_job_id*/,
112
+ CompactionServiceJobStatus status) override {
113
+ final_updated_status_ = status;
114
+ }
115
+
111
116
  int GetCompactionNum() { return compaction_num_.load(); }
112
117
 
113
118
  CompactionServiceJobInfo GetCompactionInfoForStart() { return start_info_; }
@@ -136,6 +141,10 @@ class MyTestCompactionService : public CompactionService {
136
141
 
137
142
  void SetCanceled(bool canceled) { canceled_ = canceled; }
138
143
 
144
+ CompactionServiceJobStatus GetFinalCompactionServiceJobStatus() {
145
+ return final_updated_status_.load();
146
+ }
147
+
139
148
  private:
140
149
  InstrumentedMutex mutex_;
141
150
  std::atomic_int compaction_num_{0};
@@ -158,6 +167,8 @@ class MyTestCompactionService : public CompactionService {
158
167
  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
159
168
  table_properties_collector_factories_;
160
169
  std::atomic_bool canceled_{false};
170
+ std::atomic<CompactionServiceJobStatus> final_updated_status_{
171
+ CompactionServiceJobStatus::kUseLocal};
161
172
  };
162
173
 
163
174
  class CompactionServiceTest : public DBTestBase {
@@ -255,6 +266,8 @@ TEST_F(CompactionServiceTest, BasicCompactions) {
255
266
 
256
267
  auto my_cs = GetCompactionService();
257
268
  ASSERT_GE(my_cs->GetCompactionNum(), 1);
269
+ ASSERT_EQ(CompactionServiceJobStatus::kSuccess,
270
+ my_cs->GetFinalCompactionServiceJobStatus());
258
271
 
259
272
  // make sure the compaction statistics is only recorded on the remote side
260
273
  ASSERT_GE(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), 1);
@@ -437,6 +450,8 @@ TEST_F(CompactionServiceTest, InvalidResult) {
437
450
  Slice end(end_str);
438
451
  Status s = db_->CompactRange(CompactRangeOptions(), &start, &end);
439
452
  ASSERT_FALSE(s.ok());
453
+ ASSERT_EQ(CompactionServiceJobStatus::kFailure,
454
+ my_cs->GetFinalCompactionServiceJobStatus());
440
455
  }
441
456
 
442
457
  TEST_F(CompactionServiceTest, SubCompaction) {