pyrex-rocksdb 0.1.0__cp312-cp312-macosx_14_0_arm64.whl → 0.1.4__cp312-cp312-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyrex-rocksdb might be problematic. Click here for more details.

pyrex/_pyrex.cpp CHANGED
@@ -1,9 +1,14 @@
1
- // rocksdb_wrapper.cpp
2
1
  #include <pybind11/pybind11.h>
3
2
  #include <pybind11/stl.h>
4
3
 
5
- // NEW: Include for std::unique_ptr
6
4
  #include <memory>
5
+ #include <vector>
6
+ #include <map>
7
+ #include <set>
8
+ #include <string>
9
+ #include <mutex>
10
+ #include <atomic>
11
+ #include <iostream>
7
12
 
8
13
  #include "rocksdb/db.h"
9
14
  #include "rocksdb/options.h"
@@ -14,289 +19,734 @@
14
19
  #include "rocksdb/write_batch.h"
15
20
  #include "rocksdb/iterator.h"
16
21
 
17
- #include <iostream>
18
- #include <string>
19
- #include <cstring>
20
-
21
22
  namespace py = pybind11;
22
23
 
23
- // Define a custom exception for RocksDB errors
24
+ // --- Custom Exception ---
24
25
  class RocksDBException : public std::runtime_error {
25
26
  public:
26
27
  explicit RocksDBException(const std::string& msg) : std::runtime_error(msg) {}
27
28
  };
28
29
 
29
- // --- PyOptions class to wrap rocksdb::Options ---
30
+ // --- Forward Declarations ---
31
+ class PyRocksDB;
32
+
33
+ // --- PyReadOptions Wrapper ---
34
+ class PyReadOptions {
35
+ public:
36
+ rocksdb::ReadOptions options_;
37
+
38
+ PyReadOptions() = default;
39
+
40
+ // Expose properties to Python
41
+ bool get_fill_cache() const { return options_.fill_cache; }
42
+ void set_fill_cache(bool value) { options_.fill_cache = value; }
43
+
44
+ bool get_verify_checksums() const { return options_.verify_checksums; }
45
+ void set_verify_checksums(bool value) { options_.verify_checksums = value; }
46
+ };
47
+
48
+ // --- PyWriteOptions Wrapper ---
49
+ class PyWriteOptions {
50
+ public:
51
+ rocksdb::WriteOptions options_;
52
+
53
+ PyWriteOptions() = default;
54
+
55
+ // Expose properties to Python
56
+ bool get_sync() const { return options_.sync; }
57
+ void set_sync(bool value) { options_.sync = value; }
58
+
59
+ bool get_disable_wal() const { return options_.disableWAL; }
60
+ void set_disable_wal(bool value) { options_.disableWAL = value; }
61
+ };
62
+
63
+
64
+ // --- PyOptions Wrapper ---
30
65
  class PyOptions {
31
66
  public:
32
67
  rocksdb::Options options_;
68
+ rocksdb::ColumnFamilyOptions cf_options_;
33
69
 
34
- PyOptions() : options_() {}
35
-
70
+ PyOptions() {
71
+ options_.compression = rocksdb::kSnappyCompression;
72
+ cf_options_.compression = rocksdb::kSnappyCompression;
73
+ }
36
74
  bool get_create_if_missing() const { return options_.create_if_missing; }
37
75
  void set_create_if_missing(bool value) { options_.create_if_missing = value; }
38
-
39
76
  bool get_error_if_exists() const { return options_.error_if_exists; }
40
77
  void set_error_if_exists(bool value) { options_.error_if_exists = value; }
41
-
42
78
  int get_max_open_files() const { return options_.max_open_files; }
43
79
  void set_max_open_files(int value) { options_.max_open_files = value; }
44
-
45
80
  size_t get_write_buffer_size() const { return options_.write_buffer_size; }
46
81
  void set_write_buffer_size(size_t value) { options_.write_buffer_size = value; }
47
-
48
82
  rocksdb::CompressionType get_compression() const { return options_.compression; }
49
83
  void set_compression(rocksdb::CompressionType value) { options_.compression = value; }
50
-
51
84
  int get_max_background_jobs() const { return options_.max_background_jobs; }
52
85
  void set_max_background_jobs(int value) { options_.max_background_jobs = value; }
53
-
54
- void increase_parallelism(int total_threads) {
55
- options_.IncreaseParallelism(total_threads);
56
- }
57
-
58
- void optimize_for_small_db() {
59
- options_.OptimizeForSmallDb();
60
- }
61
-
86
+ void increase_parallelism(int total_threads) { options_.IncreaseParallelism(total_threads); }
87
+ void optimize_for_small_db() { options_.OptimizeForSmallDb(); }
62
88
  void use_block_based_bloom_filter(double bits_per_key = 10.0) {
63
- if (options_.table_factory == nullptr ||
64
- std::strcmp(options_.table_factory->Name(), "BlockBasedTable") != 0) {
65
- options_.table_factory.reset(rocksdb::NewBlockBasedTableFactory());
66
- }
67
-
68
89
  rocksdb::BlockBasedTableOptions table_options;
69
- // NOTE: This part assumes we are creating a new policy, not modifying an existing one.
70
- // This is a reasonable simplification for a wrapper.
71
-
72
- // Create a new bloom filter policy
73
90
  table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(bits_per_key));
74
91
  options_.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
92
+ cf_options_.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
75
93
  }
94
+ size_t get_cf_write_buffer_size() const { return cf_options_.write_buffer_size; }
95
+ void set_cf_write_buffer_size(size_t value) { cf_options_.write_buffer_size = value; }
96
+ rocksdb::CompressionType get_cf_compression() const { return cf_options_.compression; }
97
+ void set_cf_compression(rocksdb::CompressionType value) { cf_options_.compression = value; }
76
98
  };
77
99
 
78
- // --- PyWriteBatch class to wrap rocksdb::WriteBatch ---
79
- class PyWriteBatch {
100
+ // --- PyColumnFamilyHandle Wrapper ---
101
+ class PyColumnFamilyHandle {
80
102
  public:
81
- rocksdb::WriteBatch wb_;
103
+ rocksdb::ColumnFamilyHandle* cf_handle_;
104
+ std::string name_;
82
105
 
83
- PyWriteBatch() : wb_() {}
84
-
85
- void put(const py::bytes& key_bytes, const py::bytes& value_bytes) {
86
- wb_.Put(static_cast<std::string>(key_bytes), static_cast<std::string>(value_bytes));
106
+ PyColumnFamilyHandle(rocksdb::ColumnFamilyHandle* handle, const std::string& name)
107
+ : cf_handle_(handle), name_(name) {
108
+ if (!cf_handle_) {
109
+ throw RocksDBException("Invalid ColumnFamilyHandle received.");
110
+ }
87
111
  }
112
+ const std::string& get_name() const { return name_; }
113
+ bool is_valid() const { return cf_handle_ != nullptr; }
114
+ };
88
115
 
89
- void del(const py::bytes& key_bytes) {
90
- wb_.Delete(static_cast<std::string>(key_bytes));
116
+ // --- PyWriteBatch Wrapper ---
117
+ class PyWriteBatch {
118
+ public:
119
+ rocksdb::WriteBatch wb_;
120
+ PyWriteBatch() = default;
121
+ void put(const py::bytes& key, const py::bytes& value) { wb_.Put(static_cast<std::string>(key), static_cast<std::string>(value)); }
122
+ void put_cf(PyColumnFamilyHandle& cf, const py::bytes& key, const py::bytes& value) {
123
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
124
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
125
+ rocksdb::Slice value_slice(static_cast<std::string_view>(value));
126
+ wb_.Put(cf.cf_handle_, key_slice, value_slice);
91
127
  }
92
-
93
- void clear() {
94
- wb_.Clear();
128
+ void del(const py::bytes& key) {
129
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
130
+ wb_.Delete(key_slice);
95
131
  }
132
+ void del_cf(PyColumnFamilyHandle& cf, const py::bytes& key) {
133
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
134
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
135
+ wb_.Delete(cf.cf_handle_, key_slice);
136
+ }
137
+ void merge(const py::bytes& key, const py::bytes& value) {
138
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
139
+ rocksdb::Slice value_slice(static_cast<std::string_view>(value));
140
+ wb_.Merge(key_slice, value_slice);
141
+ }
142
+ void merge_cf(PyColumnFamilyHandle& cf, const py::bytes& key, const py::bytes& value) {
143
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
144
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
145
+ rocksdb::Slice value_slice(static_cast<std::string_view>(value));
146
+ wb_.Merge(cf.cf_handle_, key_slice, value_slice);
147
+ }
148
+ void clear() { wb_.Clear(); }
96
149
  };
97
150
 
98
- // --- PyRocksDBIterator class to wrap rocksdb::Iterator ---
151
+ // --- PyRocksDBIterator Class Declaration ---
99
152
  class PyRocksDBIterator {
153
+ private:
154
+ rocksdb::Iterator* it_raw_ptr_;
155
+ std::shared_ptr<PyRocksDB> parent_db_ptr_;
156
+ void check_parent_db_is_open() const;
157
+
100
158
  public:
101
- // Raw pointer to the RocksDB Iterator.
102
- // The lifetime of this pointer is managed by this class's constructor/destructor.
103
- rocksdb::Iterator* it_;
159
+ explicit PyRocksDBIterator(rocksdb::Iterator* it, std::shared_ptr<PyRocksDB> parent_db);
160
+ ~PyRocksDBIterator();
161
+ bool valid();
162
+ void seek_to_first();
163
+ void seek_to_last();
164
+ void seek(const py::bytes& key);
165
+ void next();
166
+ void prev();
167
+ py::object key();
168
+ py::object value();
169
+ void check_status();
170
+ };
171
+
172
+ // --- PyRocksDB Class (Base) ---
173
+ class PyRocksDB : public std::enable_shared_from_this<PyRocksDB> {
174
+ protected:
175
+ rocksdb::DB* db_ = nullptr;
176
+ rocksdb::ColumnFamilyHandle* default_cf_handle_ = nullptr;
177
+ PyOptions opened_options_;
178
+ std::string path_;
179
+ std::map<std::string, std::shared_ptr<PyColumnFamilyHandle>> cf_handles_;
180
+ std::atomic<bool> is_closed_{false};
181
+ std::atomic<bool> is_read_only_{false};
182
+ std::mutex active_iterators_mutex_;
183
+ std::set<rocksdb::Iterator*> active_rocksdb_iterators_;
184
+
185
+ // Default options for read/write operations
186
+ std::shared_ptr<PyReadOptions> default_read_options_;
187
+ std::shared_ptr<PyWriteOptions> default_write_options_;
188
+
189
+
190
+ friend class PyRocksDBIterator;
191
+
192
+ void check_db_open() const {
193
+ if (is_closed_ || db_ == nullptr) {
194
+ throw RocksDBException("Database is not open or has been closed.");
195
+ }
196
+ }
104
197
 
105
- explicit PyRocksDBIterator(rocksdb::Iterator* it) : it_(it) {
106
- if (!it_) {
107
- throw RocksDBException("Failed to create RocksDB iterator: null pointer received.");
198
+ void check_read_only() const {
199
+ if (is_read_only_.load()) {
200
+ throw RocksDBException("Cannot perform put/write/delete operation: Database opened in read-only mode.");
108
201
  }
109
- // std::cout << "DEBUG: Creating RocksDB iterator." << std::endl; // Optional: for debugging
110
202
  }
111
203
 
112
- // Destructor: Ensures the C++ iterator is deleted when this object is destroyed.
113
- ~PyRocksDBIterator() {
114
- if (it_ != nullptr) {
115
- std::cout << "DEBUG: Deleting RocksDB iterator." << std::endl;
116
- delete it_;
117
- it_ = nullptr;
204
+ rocksdb::ColumnFamilyHandle* get_default_cf_handle() const {
205
+ auto it = cf_handles_.find(rocksdb::kDefaultColumnFamilyName);
206
+ if (it == cf_handles_.end() || !it->second->is_valid()) {
207
+ throw RocksDBException("Default column family handle is not available.");
118
208
  }
209
+ return it->second->cf_handle_;
119
210
  }
120
211
 
121
- bool valid() const {
122
- return it_->Valid();
212
+ public:
213
+ // Default constructor for inheritance
214
+ PyRocksDB()
215
+ : default_read_options_(std::make_shared<PyReadOptions>()),
216
+ default_write_options_(std::make_shared<PyWriteOptions>())
217
+ {}
218
+
219
+ // Public constructor for the simple interface
220
+ PyRocksDB(const std::string& path, PyOptions* py_options, bool read_only = false)
221
+ : default_read_options_(std::make_shared<PyReadOptions>()),
222
+ default_write_options_(std::make_shared<PyWriteOptions>())
223
+ {
224
+ this->path_ = path;
225
+ this->is_read_only_.store(read_only);
226
+ rocksdb::Options options;
227
+ if (py_options) {
228
+ options = py_options->options_;
229
+ this->opened_options_ = *py_options;
230
+ } else {
231
+ options.create_if_missing = true;
232
+ this->opened_options_.options_ = options;
233
+ }
234
+
235
+ rocksdb::Status s;
236
+ if (read_only) {
237
+ s = rocksdb::DB::OpenForReadOnly(options, path, &this->db_);
238
+ } else {
239
+ s = rocksdb::DB::Open(options, path, &this->db_);
240
+ }
241
+ if (!s.ok()) {
242
+ throw RocksDBException("Failed to open RocksDB at " + path + ": " + s.ToString());
243
+ }
244
+ this->default_cf_handle_ = this->db_->DefaultColumnFamily();
245
+ this->cf_handles_[rocksdb::kDefaultColumnFamilyName] = std::make_shared<PyColumnFamilyHandle>(this->default_cf_handle_, rocksdb::kDefaultColumnFamilyName);
123
246
  }
124
247
 
125
- void seek_to_first() {
126
- it_->SeekToFirst();
248
+ virtual ~PyRocksDB() {
249
+ close();
127
250
  }
128
251
 
129
- void seek_to_last() {
130
- it_->SeekToLast();
252
+ void close() {
253
+ if (!is_closed_.exchange(true)) {
254
+ {
255
+ std::lock_guard<std::mutex> lock(active_iterators_mutex_);
256
+ for (rocksdb::Iterator* iter_raw_ptr : active_rocksdb_iterators_) {
257
+ delete iter_raw_ptr;
258
+ }
259
+ active_rocksdb_iterators_.clear();
260
+ }
261
+ for (auto const& [name, handle_ptr] : cf_handles_) {
262
+ handle_ptr->cf_handle_ = nullptr;
263
+ }
264
+ cf_handles_.clear();
265
+ if (db_) {
266
+ delete db_;
267
+ db_ = nullptr;
268
+ }
269
+ }
131
270
  }
132
271
 
133
- void seek(const py::bytes& key_bytes) {
134
- it_->Seek(static_cast<std::string>(key_bytes));
272
+ void put(const py::bytes& key, const py::bytes& value, std::shared_ptr<PyWriteOptions> write_options = nullptr) {
273
+ check_db_open();
274
+ check_read_only();
275
+
276
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
277
+ rocksdb::Slice value_slice(static_cast<std::string_view>(value));
278
+
279
+ const auto& opts = write_options ? write_options->options_ : default_write_options_->options_;
280
+ rocksdb::Status s = db_->Put(opts, default_cf_handle_, key_slice, value_slice);
281
+ if (!s.ok()) throw RocksDBException("Put failed: " + s.ToString());
282
+ }
283
+
284
+ py::object get(const py::bytes& key, std::shared_ptr<PyReadOptions> read_options = nullptr) {
285
+ check_db_open();
286
+ std::string value_str;
287
+ rocksdb::Status s;
288
+
289
+ const auto& opts = read_options ? read_options->options_ : default_read_options_->options_;
290
+
291
+ {
292
+ py::gil_scoped_release release;
293
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
294
+ s = db_->Get(opts, default_cf_handle_, key_slice, &value_str);
295
+ }
296
+ if (s.ok()) return py::bytes(value_str);
297
+ if (s.IsNotFound()) return py::none();
298
+ throw RocksDBException("Get failed: " + s.ToString());
135
299
  }
136
300
 
137
- void next() {
138
- it_->Next();
301
+ void del(const py::bytes& key, std::shared_ptr<PyWriteOptions> write_options = nullptr) {
302
+ check_db_open();
303
+ check_read_only();
304
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
305
+ const auto& opts = write_options ? write_options->options_ : default_write_options_->options_;
306
+ rocksdb::Status s = db_->Delete(opts, default_cf_handle_, key_slice);
307
+ if (!s.ok()) throw RocksDBException("Delete failed: " + s.ToString());
139
308
  }
140
309
 
141
- void prev() {
142
- it_->Prev();
310
+ void write(PyWriteBatch& batch, std::shared_ptr<PyWriteOptions> write_options = nullptr) {
311
+ check_db_open();
312
+ check_read_only();
313
+ const auto& opts = write_options ? write_options->options_ : default_write_options_->options_;
314
+ rocksdb::Status s = db_->Write(opts, &batch.wb_);
315
+ if (!s.ok()) throw RocksDBException("Write failed: " + s.ToString());
143
316
  }
144
317
 
145
- py::object key() {
146
- if (it_->Valid()) {
147
- return py::bytes(it_->key().ToString());
318
+ std::shared_ptr<PyRocksDBIterator> new_iterator(std::shared_ptr<PyReadOptions> read_options = nullptr) {
319
+ check_db_open();
320
+ const auto& opts = read_options ? read_options->options_ : default_read_options_->options_;
321
+ rocksdb::Iterator* raw_iter = db_->NewIterator(opts, default_cf_handle_);
322
+ {
323
+ std::lock_guard<std::mutex> lock(active_iterators_mutex_);
324
+ active_rocksdb_iterators_.insert(raw_iter);
148
325
  }
149
- return py::none();
326
+ return std::make_shared<PyRocksDBIterator>(raw_iter, shared_from_this());
150
327
  }
151
328
 
152
- py::object value() {
153
- if (it_->Valid()) {
154
- return py::bytes(it_->value().ToString());
155
- }
156
- return py::none();
329
+ PyOptions get_options() const { return opened_options_; }
330
+
331
+ // Getters and setters for default options
332
+ std::shared_ptr<PyReadOptions> get_default_read_options() { return default_read_options_; }
333
+ void set_default_read_options(std::shared_ptr<PyReadOptions> opts) {
334
+ if (!opts) throw RocksDBException("ReadOptions cannot be None.");
335
+ default_read_options_ = opts;
157
336
  }
158
337
 
159
- void check_status() {
160
- rocksdb::Status status = it_->status();
161
- if (!status.ok()) {
162
- throw RocksDBException("RocksDB Iterator error: " + status.ToString());
163
- }
338
+ std::shared_ptr<PyWriteOptions> get_default_write_options() { return default_write_options_; }
339
+ void set_default_write_options(std::shared_ptr<PyWriteOptions> opts) {
340
+ if (!opts) throw RocksDBException("WriteOptions cannot be None.");
341
+ default_write_options_ = opts;
164
342
  }
165
343
  };
166
344
 
167
- // --- PyRocksDB class to wrap rocksdb::DB ---
168
- class PyRocksDB {
345
+ // --- PyRocksDBExtended Class (Derived) ---
346
+ class PyRocksDBExtended : public PyRocksDB {
169
347
  public:
170
- rocksdb::DB* db_;
171
- PyOptions opened_options_; // Store the options used to open the DB
172
- std::string path_; // IMPROVEMENT: Store the path for accurate debug messages
173
-
174
- PyRocksDB(const std::string& path, PyOptions* py_options = nullptr) : db_(nullptr), path_(path) {
175
- rocksdb::Options actual_options;
348
+ // Constructor for the extended interface with CF support
349
+ PyRocksDBExtended(const std::string& path, PyOptions* py_options, bool read_only = false) {
350
+ this->path_ = path;
351
+ this->is_read_only_ = read_only;
352
+ rocksdb::Options options;
353
+ if (py_options) {
354
+ options = py_options->options_;
355
+ this->opened_options_ = *py_options;
356
+ } else {
357
+ options.create_if_missing = true;
358
+ this->opened_options_.options_ = options;
359
+ this->opened_options_.cf_options_.compression = rocksdb::kSnappyCompression;
360
+ }
176
361
 
177
- if (py_options != nullptr) {
178
- actual_options = py_options->options_;
362
+ std::vector<std::string> cf_names;
363
+ rocksdb::Status s = rocksdb::DB::ListColumnFamilies(options, path, &cf_names);
364
+
365
+ std::vector<rocksdb::ColumnFamilyDescriptor> cf_descriptors;
366
+ if (s.IsNotFound() || s.IsIOError()) {
367
+ if (options.create_if_missing) {
368
+ cf_descriptors.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, this->opened_options_.cf_options_));
369
+ } else {
370
+ throw RocksDBException("Database not found at " + path + " and create_if_missing is false.");
371
+ }
372
+ } else if (s.ok()) {
373
+ for (const auto& name : cf_names) {
374
+ cf_descriptors.push_back(rocksdb::ColumnFamilyDescriptor(name, this->opened_options_.cf_options_));
375
+ }
179
376
  } else {
180
- actual_options.create_if_missing = true;
377
+ throw RocksDBException("Failed to list column families at " + path + ": " + s.ToString());
181
378
  }
182
379
 
183
- opened_options_.options_ = actual_options;
380
+ std::vector<rocksdb::ColumnFamilyHandle*> handles;
381
+ rocksdb::Status s_open;
184
382
 
185
- rocksdb::Status status = rocksdb::DB::Open(actual_options, path_, &db_);
383
+ if (read_only) {
384
+ s_open = rocksdb::DB::OpenForReadOnly(options, path, cf_descriptors, &handles, &this->db_);
385
+ } else {
386
+ s_open = rocksdb::DB::Open(options, path, cf_descriptors, &handles, &this->db_);
387
+ }
388
+
389
+ if (!s_open.ok()) {
390
+ throw RocksDBException("Failed to open RocksDB at " + path + ": " + s.ToString());
391
+ }
186
392
 
187
- if (!status.ok()) {
188
- throw RocksDBException("Failed to open RocksDB at " + path_ + ": " + status.ToString());
393
+ for (size_t i = 0; i < handles.size(); ++i) {
394
+ const std::string& cf_name = cf_descriptors[i].name;
395
+ this->cf_handles_[cf_name] = std::make_shared<PyColumnFamilyHandle>(handles[i], cf_name);
396
+
397
+ if (cf_name == rocksdb::kDefaultColumnFamilyName) {
398
+ this->default_cf_handle_ = handles[i];
399
+ }
189
400
  }
401
+
402
+ if (!this->default_cf_handle_) {
403
+ throw RocksDBException("Default column family not found after opening.");
404
+ }
405
+ }
190
406
 
191
- std::cout << "RocksDB opened successfully at: " << path_ << std::endl;
407
+ void put_cf(PyColumnFamilyHandle& cf, const py::bytes& key, const py::bytes& value, std::shared_ptr<PyWriteOptions> write_options = nullptr) {
408
+ check_db_open();
409
+ check_read_only();
410
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
411
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
412
+ rocksdb::Slice value_slice(static_cast<std::string_view>(value));
413
+ const auto& opts = write_options ? write_options->options_ : default_write_options_->options_;
414
+ rocksdb::Status s = db_->Put(opts, cf.cf_handle_, key_slice, value_slice);
415
+ if (!s.ok()) throw RocksDBException("put_cf failed: " + s.ToString());
192
416
  }
193
417
 
194
- ~PyRocksDB() {
195
- if (db_ != nullptr) {
196
- // IMPROVEMENT: Use the stored path_ member for a reliable close message.
197
- std::cout << "DEBUG: Closing RocksDB database at " << path_ << std::endl;
198
- delete db_;
199
- db_ = nullptr;
418
+ py::object get_cf(PyColumnFamilyHandle& cf, const py::bytes& key, std::shared_ptr<PyReadOptions> read_options = nullptr) {
419
+ check_db_open();
420
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
421
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
422
+ std::string value_str;
423
+ const auto& opts = read_options ? read_options->options_ : default_read_options_->options_;
424
+ rocksdb::Status s = db_->Get(opts, cf.cf_handle_, key_slice, &value_str);
425
+ if (s.ok()) return py::bytes(value_str);
426
+ if (s.IsNotFound()) return py::none();
427
+ throw RocksDBException("get_cf failed: " + s.ToString());
428
+ }
429
+
430
+ void del_cf(PyColumnFamilyHandle& cf, const py::bytes& key, std::shared_ptr<PyWriteOptions> write_options = nullptr) {
431
+ check_db_open();
432
+ check_read_only();
433
+ if (!cf.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
434
+ rocksdb::Slice key_slice(static_cast<std::string_view>(key));
435
+ const auto& opts = write_options ? write_options->options_ : default_write_options_->options_;
436
+ rocksdb::Status s = db_->Delete(opts, cf.cf_handle_, key_slice);
437
+ if (!s.ok()) throw RocksDBException("del_cf failed: " + s.ToString());
438
+ }
439
+
440
+ std::vector<std::string> list_column_families() {
441
+ check_db_open();
442
+ std::vector<std::string> names;
443
+ for (const auto& pair : cf_handles_) {
444
+ names.push_back(pair.first);
200
445
  }
446
+ return names;
201
447
  }
202
448
 
203
- void put(const py::bytes& key_bytes, const py::bytes& value_bytes) {
204
- rocksdb::Status status = db_->Put(rocksdb::WriteOptions(),
205
- static_cast<std::string>(key_bytes),
206
- static_cast<std::string>(value_bytes));
207
- if (!status.ok()) {
208
- throw RocksDBException("Failed to put key-value pair: " + status.ToString());
449
+ std::shared_ptr<PyColumnFamilyHandle> create_column_family(const std::string& name, PyOptions* cf_py_options) {
450
+ check_db_open();
451
+ check_read_only();
452
+ if (cf_handles_.count(name)) {
453
+ throw RocksDBException("Column family '" + name + "' already exists.");
209
454
  }
455
+ rocksdb::ColumnFamilyOptions cf_opts = cf_py_options ? cf_py_options->cf_options_ : opened_options_.cf_options_;
456
+ rocksdb::ColumnFamilyHandle* cf_handle;
457
+ rocksdb::Status s = db_->CreateColumnFamily(cf_opts, name, &cf_handle);
458
+ if (!s.ok()) throw RocksDBException("Failed to create column family '" + name + "': " + s.ToString());
459
+
460
+ auto new_handle = std::make_shared<PyColumnFamilyHandle>(cf_handle, name);
461
+ cf_handles_[name] = new_handle;
462
+ return new_handle;
210
463
  }
211
464
 
212
- py::object get(const py::bytes& key_bytes) {
213
- std::string value_str;
214
- rocksdb::Status status = db_->Get(rocksdb::ReadOptions(), static_cast<std::string>(key_bytes), &value_str);
465
+ void drop_column_family(PyColumnFamilyHandle& cf_handle) {
466
+ check_db_open();
467
+ check_read_only();
468
+ if (!cf_handle.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
469
+ if (cf_handle.get_name() == rocksdb::kDefaultColumnFamilyName) throw RocksDBException("Cannot drop the default column family.");
215
470
 
216
- if (status.ok()) {
217
- return py::bytes(value_str);
218
- } else if (status.IsNotFound()) {
219
- return py::none();
220
- } else {
221
- throw RocksDBException("Failed to get value for key: " + status.ToString());
471
+ rocksdb::ColumnFamilyHandle* raw_handle = cf_handle.cf_handle_;
472
+ std::string cf_name = cf_handle.get_name();
473
+
474
+ rocksdb::Status s = db_->DropColumnFamily(raw_handle);
475
+ if (!s.ok()) throw RocksDBException("Failed to drop column family '" + cf_name + "': " + s.ToString());
476
+
477
+ s = db_->DestroyColumnFamilyHandle(raw_handle);
478
+ if (!s.ok()) throw RocksDBException("Dropped CF but failed to destroy handle: " + s.ToString());
479
+
480
+ cf_handles_.erase(cf_name);
481
+ cf_handle.cf_handle_ = nullptr;
482
+ }
483
+
484
+ std::shared_ptr<PyColumnFamilyHandle> get_column_family(const std::string& name) {
485
+ check_db_open();
486
+ auto it = cf_handles_.find(name);
487
+ return (it == cf_handles_.end()) ? nullptr : it->second;
488
+ }
489
+
490
+ std::shared_ptr<PyColumnFamilyHandle> get_default_cf() {
491
+ check_db_open();
492
+ return get_column_family(rocksdb::kDefaultColumnFamilyName);
493
+ }
494
+
495
+ std::shared_ptr<PyRocksDBIterator> new_cf_iterator(PyColumnFamilyHandle& cf_handle, std::shared_ptr<PyReadOptions> read_options = nullptr) {
496
+ check_db_open();
497
+ if (!cf_handle.is_valid()) throw RocksDBException("ColumnFamilyHandle is invalid.");
498
+
499
+ const auto& opts = read_options ? read_options->options_ : default_read_options_->options_;
500
+ rocksdb::Iterator* raw_iter = db_->NewIterator(opts, cf_handle.cf_handle_);
501
+ {
502
+ std::lock_guard<std::mutex> lock(active_iterators_mutex_);
503
+ active_rocksdb_iterators_.insert(raw_iter);
222
504
  }
505
+ return std::make_shared<PyRocksDBIterator>(raw_iter, shared_from_this());
223
506
  }
507
+ };
224
508
 
225
- // This method is safe to return by value, as PyOptions is a simple
226
- // wrapper around a copyable rocksdb::Options object.
227
- PyOptions get_options() const {
228
- return opened_options_;
509
+ // --- PyRocksDBIterator Method Implementations ---
510
+ PyRocksDBIterator::PyRocksDBIterator(rocksdb::Iterator* it, std::shared_ptr<PyRocksDB> parent_db)
511
+ : it_raw_ptr_(it), parent_db_ptr_(std::move(parent_db)) {
512
+ if (!it_raw_ptr_) {
513
+ throw RocksDBException("Failed to create iterator: null pointer received.");
229
514
  }
515
+ }
230
516
 
231
- void write(PyWriteBatch& py_write_batch) {
232
- rocksdb::Status status = db_->Write(rocksdb::WriteOptions(), &py_write_batch.wb_);
233
- if (!status.ok()) {
234
- throw RocksDBException("Failed to write batch: " + status.ToString());
517
+ PyRocksDBIterator::~PyRocksDBIterator() {
518
+ if (parent_db_ptr_) {
519
+ std::lock_guard<std::mutex> lock(parent_db_ptr_->active_iterators_mutex_);
520
+ if (it_raw_ptr_ && parent_db_ptr_->active_rocksdb_iterators_.count(it_raw_ptr_)) {
521
+ parent_db_ptr_->active_rocksdb_iterators_.erase(it_raw_ptr_);
522
+ delete it_raw_ptr_;
235
523
  }
236
524
  }
525
+ it_raw_ptr_ = nullptr;
526
+ }
527
+
528
+ void PyRocksDBIterator::check_parent_db_is_open() const {
529
+ if (!parent_db_ptr_ || parent_db_ptr_->is_closed_.load()) {
530
+ throw RocksDBException("Database is closed.");
531
+ }
532
+ }
237
533
 
238
- // **FIXED**: Return a unique_ptr to transfer ownership to pybind11.
239
- // This prevents the temporary iterator object from being destroyed prematurely.
240
- std::unique_ptr<PyRocksDBIterator> new_iterator() {
241
- rocksdb::ReadOptions read_options;
242
- return std::make_unique<PyRocksDBIterator>(db_->NewIterator(read_options));
534
+ bool PyRocksDBIterator::valid() { check_parent_db_is_open(); return it_raw_ptr_->Valid(); }
535
+ void PyRocksDBIterator::seek_to_first() { check_parent_db_is_open(); it_raw_ptr_->SeekToFirst(); }
536
+ void PyRocksDBIterator::seek_to_last() { check_parent_db_is_open(); it_raw_ptr_->SeekToLast(); }
537
+ void PyRocksDBIterator::seek(const py::bytes& key) { check_parent_db_is_open(); it_raw_ptr_->Seek(static_cast<std::string>(key)); }
538
+ void PyRocksDBIterator::next() { check_parent_db_is_open(); it_raw_ptr_->Next(); }
539
+ void PyRocksDBIterator::prev() { check_parent_db_is_open(); it_raw_ptr_->Prev(); }
540
+
541
+ py::object PyRocksDBIterator::key() {
542
+ check_parent_db_is_open();
543
+ if (it_raw_ptr_ && it_raw_ptr_->Valid()) {
544
+ return py::bytes(it_raw_ptr_->key().ToString());
243
545
  }
244
- };
546
+ return py::none();
547
+ }
548
+
549
+ py::object PyRocksDBIterator::value() {
550
+ check_parent_db_is_open();
551
+ if (it_raw_ptr_ && it_raw_ptr_->Valid()) {
552
+ return py::bytes(it_raw_ptr_->value().ToString());
553
+ }
554
+ return py::none();
555
+ }
556
+
557
+ void PyRocksDBIterator::check_status() {
558
+ check_parent_db_is_open();
559
+ if (it_raw_ptr_) {
560
+ rocksdb::Status s = it_raw_ptr_->status();
561
+ if (!s.ok()) throw RocksDBException("Iterator error: " + s.ToString());
562
+ }
563
+ }
245
564
 
246
565
  // --- PYBIND11 MODULE DEFINITION ---
247
566
  PYBIND11_MODULE(_pyrex, m) {
248
- m.doc() = "pybind11 RocksDB wrapper";
249
-
250
- py::register_exception<RocksDBException>(m, "RocksDBException");
251
-
252
- py::enum_<rocksdb::CompressionType>(m, "CompressionType")
253
- .value("kNoCompression", rocksdb::kNoCompression)
254
- .value("kSnappyCompression", rocksdb::kSnappyCompression)
255
- .value("kBZip2Compression", rocksdb::kBZip2Compression)
256
- .value("kLZ4Compression", rocksdb::kLZ4Compression)
257
- .value("kLZ4HCCompression", rocksdb::kLZ4HCCompression)
258
- .value("kXpressCompression", rocksdb::kXpressCompression)
259
- .value("kZSTD", rocksdb::kZSTD)
260
- .value("kDisableCompressionOption", rocksdb::kDisableCompressionOption)
261
- .export_values();
262
-
263
- py::class_<PyOptions>(m, "PyOptions")
264
- .def(py::init<>())
265
- .def_property("create_if_missing", &PyOptions::get_create_if_missing, &PyOptions::set_create_if_missing)
266
- .def_property("error_if_exists", &PyOptions::get_error_if_exists, &PyOptions::set_error_if_exists)
267
- .def_property("max_open_files", &PyOptions::get_max_open_files, &PyOptions::set_max_open_files)
268
- .def_property("write_buffer_size", &PyOptions::get_write_buffer_size, &PyOptions::set_write_buffer_size)
269
- .def_property("compression", &PyOptions::get_compression, &PyOptions::set_compression)
270
- .def_property("max_background_jobs", &PyOptions::get_max_background_jobs, &PyOptions::set_max_background_jobs)
271
- .def("increase_parallelism", &PyOptions::increase_parallelism, py::arg("total_threads"))
272
- .def("optimize_for_small_db", &PyOptions::optimize_for_small_db)
273
- .def("use_block_based_bloom_filter", &PyOptions::use_block_based_bloom_filter, py::arg("bits_per_key") = 10.0);
274
-
275
- py::class_<PyWriteBatch>(m, "PyWriteBatch")
276
- .def(py::init<>())
277
- .def("put", &PyWriteBatch::put, py::arg("key"), py::arg("value"))
278
- .def("delete", &PyWriteBatch::del, py::arg("key"))
279
- .def("clear", &PyWriteBatch::clear);
280
-
281
- py::class_<PyRocksDBIterator>(m, "PyRocksDBIterator")
282
- .def("valid", &PyRocksDBIterator::valid)
283
- .def("seek_to_first", &PyRocksDBIterator::seek_to_first)
284
- .def("seek_to_last", &PyRocksDBIterator::seek_to_last)
285
- .def("seek", &PyRocksDBIterator::seek, py::arg("key"))
286
- .def("next", &PyRocksDBIterator::next)
287
- .def("prev", &PyRocksDBIterator::prev)
288
- .def("key", &PyRocksDBIterator::key)
289
- .def("value", &PyRocksDBIterator::value)
290
- .def("check_status", &PyRocksDBIterator::check_status);
291
-
292
- py::class_<PyRocksDB>(m, "PyRocksDB")
293
- .def(py::init<const std::string&, PyOptions*>(), py::arg("path"), py::arg("options") = nullptr)
294
- .def("put", &PyRocksDB::put, py::arg("key"), py::arg("value"))
295
- .def("get", &PyRocksDB::get, py::arg("key"))
296
- .def("get_options", &PyRocksDB::get_options)
297
- .def("write", &PyRocksDB::write, py::arg("write_batch"))
298
- .def("new_iterator", &PyRocksDB::new_iterator,
299
- "Creates and returns a new RocksDB iterator.",
300
- py::keep_alive<0, 1>()
301
- );
567
+ m.doc() = R"doc(
568
+ A robust, high-performance Python wrapper for the RocksDB key-value store.
569
+
570
+ This module provides two main classes for interacting with RocksDB:
571
+ 1. PyRocksDB: A simple interface for standard key-value operations on a
572
+ database with a single (default) column family.
573
+ 2. PyRocksDBExtended: An advanced interface that inherits from PyRocksDB and
574
+ adds full support for creating, managing, and using multiple Column Families.
575
+ )doc";
576
+
577
+ // 1. Create the Python exception type and give it a docstring.
578
+ static py::exception<RocksDBException> rocksdb_exception(m, "RocksDBException", PyExc_RuntimeError);
579
+ rocksdb_exception.doc() = R"doc(
580
+ Custom exception raised for RocksDB-specific operational errors.
581
+
582
+ This exception is raised when a RocksDB operation fails for reasons
583
+ such as I/O errors, corruption, invalid arguments, or when an operation
584
+ is attempted on a closed database.
585
+ )doc";
586
+
587
+ // 2. Register a translator that maps the C++ exception to the Python one.
588
+ py::register_exception_translator([](std::exception_ptr p) {
589
+ try {
590
+ if (p) {
591
+ std::rethrow_exception(p);
592
+ }
593
+ } catch (const RocksDBException &e) {
594
+ // Use PyErr_SetString to set the Python error object correctly.
595
+ // 'rocksdb_exception' is a static variable and accessible without capture.
596
+ PyErr_SetString(rocksdb_exception.ptr(), e.what());
597
+ }
598
+ });
599
+
600
+
601
+ py::enum_<rocksdb::CompressionType>(m, "CompressionType", R"doc(
602
+ Enum for different compression types supported by RocksDB.
603
+ )doc")
604
+ .value("kNoCompression", rocksdb::kNoCompression, "No compression.")
605
+ .value("kSnappyCompression", rocksdb::kSnappyCompression, "Snappy compression (default).")
606
+ .value("kBZip2Compression", rocksdb::kBZip2Compression, "BZip2 compression.")
607
+ .value("kLZ4Compression", rocksdb::kLZ4Compression, "LZ4 compression.")
608
+ .value("kLZ4HCCompression", rocksdb::kLZ4HCCompression, "LZ4HC (high compression) compression.")
609
+ .value("kXpressCompression", rocksdb::kXpressCompression, "Xpress compression.")
610
+ .value("kZSTD", rocksdb::kZSTD, "Zstandard compression.");
611
+
612
+ py::class_<PyReadOptions, std::shared_ptr<PyReadOptions>>(m, "ReadOptions", R"doc(
613
+ Configuration options for read operations (Get, Iterator).
614
+ )doc")
615
+ .def(py::init<>(), "Constructs a new ReadOptions object with default settings.")
616
+ .def_property("fill_cache", &PyReadOptions::get_fill_cache, &PyReadOptions::set_fill_cache, "If True, reads will fill the block cache. Defaults to True.")
617
+ .def_property("verify_checksums", &PyReadOptions::get_verify_checksums, &PyReadOptions::set_verify_checksums, "If True, all data read from underlying storage will be verified against its checksums. Defaults to True.");
618
+
619
+ py::class_<PyWriteOptions, std::shared_ptr<PyWriteOptions>>(m, "WriteOptions", R"doc(
620
+ Configuration options for write operations (Put, Delete, Write).
621
+ )doc")
622
+ .def(py::init<>(), "Constructs a new WriteOptions object with default settings.")
623
+ .def_property("sync", &PyWriteOptions::get_sync, &PyWriteOptions::set_sync, "If True, the write will be flushed from the OS buffer cache before the write is considered complete. Defaults to False.")
624
+ .def_property("disable_wal", &PyWriteOptions::get_disable_wal, &PyWriteOptions::set_disable_wal, "If True, writes will not be written to the Write Ahead Log. Defaults to False.");
625
+
626
+ py::class_<PyOptions>(m, "PyOptions", R"doc(
627
+ Configuration options for opening and managing a RocksDB database.
628
+
629
+ This class wraps `rocksdb::Options` and `rocksdb::ColumnFamilyOptions`
630
+ to provide a convenient way to configure database behavior from Python.
631
+ )doc")
632
+ .def(py::init<>(), "Constructs a new PyOptions object with default settings.")
633
+ .def_property("create_if_missing", &PyOptions::get_create_if_missing, &PyOptions::set_create_if_missing, "If True, the database will be created if it is missing. Defaults to True.")
634
+ .def_property("error_if_exists", &PyOptions::get_error_if_exists, &PyOptions::set_error_if_exists, "If True, an error is raised if the database already exists. Defaults to False.")
635
+ .def_property("max_open_files", &PyOptions::get_max_open_files, &PyOptions::set_max_open_files, "Number of open files that can be used by the DB. Defaults to -1 (unlimited).")
636
+ .def_property("write_buffer_size", &PyOptions::get_write_buffer_size, &PyOptions::set_write_buffer_size, "Amount of data to build up in a memory buffer (MemTable) before flushing. Defaults to 64MB.")
637
+ .def_property("compression", &PyOptions::get_compression, &PyOptions::set_compression, "The compression type to use for sst files. Defaults to Snappy.")
638
+ .def_property("max_background_jobs", &PyOptions::get_max_background_jobs, &PyOptions::set_max_background_jobs, "Maximum number of concurrent background jobs (compactions and flushes).")
639
+ .def("increase_parallelism", &PyOptions::increase_parallelism, py::arg("total_threads"), R"doc(
640
+ Increases RocksDB's parallelism by tuning background threads.
641
+
642
+ Args:
643
+ total_threads (int): The total number of background threads to use.
644
+ )doc", py::call_guard<py::gil_scoped_release>())
645
+ .def("optimize_for_small_db", &PyOptions::optimize_for_small_db, R"doc(
646
+ Optimizes RocksDB for small databases by reducing memory and CPU consumption.
647
+ )doc", py::call_guard<py::gil_scoped_release>())
648
+ .def("use_block_based_bloom_filter", &PyOptions::use_block_based_bloom_filter, py::arg("bits_per_key") = 10.0, R"doc(
649
+ Enables a Bloom filter for block-based tables to speed up 'Get' operations.
650
+
651
+ Args:
652
+ bits_per_key (float): The number of bits per key for the Bloom filter.
653
+ Higher values reduce false positives but increase memory usage.
654
+ )doc", py::call_guard<py::gil_scoped_release>())
655
+ .def_property("cf_write_buffer_size", &PyOptions::get_cf_write_buffer_size, &PyOptions::set_cf_write_buffer_size, "Default write_buffer_size for newly created Column Families.")
656
+ .def_property("cf_compression", &PyOptions::get_cf_compression, &PyOptions::set_cf_compression, "Default compression type for newly created Column Families.");
657
+
658
+ py::class_<PyColumnFamilyHandle, std::shared_ptr<PyColumnFamilyHandle>>(m, "ColumnFamilyHandle", R"doc(
659
+ Represents a handle to a RocksDB Column Family.
660
+
661
+ This object is used to perform operations on a specific data partition
662
+ within a `PyRocksDBExtended` instance.
663
+ )doc")
664
+ .def_property_readonly("name", &PyColumnFamilyHandle::get_name, "The name of this column family.")
665
+ .def("is_valid", &PyColumnFamilyHandle::is_valid, "Checks if the handle is still valid (i.e., has not been dropped).");
666
+
667
+ py::class_<PyWriteBatch>(m, "PyWriteBatch", R"doc(
668
+ A batch of write operations (Put, Delete) that can be applied atomically.
669
+ )doc")
670
+ .def(py::init<>(), "Constructs an empty write batch.")
671
+ .def("put", &PyWriteBatch::put, py::arg("key"), py::arg("value"), "Adds a key-value pair to the batch for the default column family.")
672
+ .def("put_cf", &PyWriteBatch::put_cf, py::arg("cf_handle"), py::arg("key"), py::arg("value"), "Adds a key-value pair to the batch for a specific column family.")
673
+ .def("delete", &PyWriteBatch::del, py::arg("key"), "Adds a key deletion to the batch for the default column family.")
674
+ .def("delete_cf", &PyWriteBatch::del_cf, py::arg("cf_handle"), py::arg("key"), "Adds a key deletion to the batch for a specific column family.")
675
+ .def("merge", &PyWriteBatch::merge, py::arg("key"), py::arg("value"), "Adds a merge operation to the batch for the default column family.")
676
+ .def("merge_cf", &PyWriteBatch::merge_cf, py::arg("cf_handle"), py::arg("key"), py::arg("value"), "Adds a merge operation to the batch for a specific column family.")
677
+ .def("clear", &PyWriteBatch::clear, "Clears all operations from the batch.");
678
+
679
+ py::class_<PyRocksDBIterator, std::shared_ptr<PyRocksDBIterator>>(m, "PyRocksDBIterator", R"doc(
680
+ An iterator for traversing key-value pairs in a RocksDB database.
681
+ )doc")
682
+ .def("valid", &PyRocksDBIterator::valid, "Returns True if the iterator is currently positioned at a valid entry.", py::call_guard<py::gil_scoped_release>())
683
+ .def("seek_to_first", &PyRocksDBIterator::seek_to_first, "Positions the iterator at the first key.", py::call_guard<py::gil_scoped_release>())
684
+ .def("seek_to_last", &PyRocksDBIterator::seek_to_last, "Positions the iterator at the last key.", py::call_guard<py::gil_scoped_release>())
685
+ .def("seek", &PyRocksDBIterator::seek, py::arg("key"), "Positions the iterator at the first key >= the given key.", py::call_guard<py::gil_scoped_release>())
686
+ .def("next", &PyRocksDBIterator::next, "Moves the iterator to the next entry.", py::call_guard<py::gil_scoped_release>())
687
+ .def("prev", &PyRocksDBIterator::prev, "Moves the iterator to the previous entry.", py::call_guard<py::gil_scoped_release>())
688
+ .def("key", &PyRocksDBIterator::key, "Returns the current key as bytes, or None if invalid.")
689
+ .def("value", &PyRocksDBIterator::value, "Returns the current value as bytes, or None if invalid.")
690
+ .def("check_status", &PyRocksDBIterator::check_status, "Raises RocksDBException if an error occurred during iteration.", py::call_guard<py::gil_scoped_release>());
691
+
692
+ py::class_<PyRocksDB, std::shared_ptr<PyRocksDB>>(m, "PyRocksDB", R"doc(
693
+ A Python wrapper for RocksDB providing simple key-value storage.
694
+
695
+ This class interacts exclusively with the 'default' column family.
696
+ For multi-column-family support, use `PyRocksDBExtended`.
697
+ )doc")
698
+ .def(py::init<const std::string&, PyOptions*, bool>(),
699
+ py::arg("path"),
700
+ py::arg("options") = nullptr,
701
+ py::arg("read_only") = false,
702
+ R"doc(
703
+ Opens a RocksDB database at the specified path.
704
+
705
+ Args:
706
+ path (str): The file system path to the database.
707
+ options (PyOptions, optional): Custom options for configuration.
708
+ read_only (bool, optional): If True, opens the database in read-only mode.
709
+ Defaults to False.
710
+ )doc", py::call_guard<py::gil_scoped_release>())
711
+ .def("put", &PyRocksDB::put, py::arg("key"), py::arg("value"), py::arg("write_options") = nullptr, "Inserts a key-value pair.", py::call_guard<py::gil_scoped_release>())
712
+ .def("get", &PyRocksDB::get, py::arg("key"), py::arg("read_options") = nullptr, "Retrieves the value for a key.")
713
+ .def("delete", &PyRocksDB::del, py::arg("key"), py::arg("write_options") = nullptr, "Deletes a key.", py::call_guard<py::gil_scoped_release>())
714
+ .def("write", &PyRocksDB::write, py::arg("write_batch"), py::arg("write_options") = nullptr, "Applies a batch of operations atomically.", py::call_guard<py::gil_scoped_release>())
715
+ .def("new_iterator", &PyRocksDB::new_iterator, py::arg("read_options") = nullptr, "Creates a new iterator.", py::keep_alive<0, 1>())
716
+ .def("get_options", &PyRocksDB::get_options, "Returns the options the database was opened with.")
717
+ .def_property("default_read_options", &PyRocksDB::get_default_read_options, &PyRocksDB::set_default_read_options, "The default ReadOptions used for get and iterator operations.")
718
+ .def_property("default_write_options", &PyRocksDB::get_default_write_options, &PyRocksDB::set_default_write_options, "The default WriteOptions used for put, delete, and write operations.")
719
+ .def("close", &PyRocksDB::close, "Closes the database, releasing resources and the lock.", py::call_guard<py::gil_scoped_release>())
720
+ .def("__enter__", [](PyRocksDB &db) -> PyRocksDB& { return db; })
721
+ .def("__exit__", [](PyRocksDB &db, py::object /* type */, py::object /* value */, py::object /* traceback */) {
722
+ db.close();
723
+ });
724
+
725
+ py::class_<PyRocksDBExtended, PyRocksDB, std::shared_ptr<PyRocksDBExtended>>(m, "PyRocksDBExtended", R"doc(
726
+ An advanced Python wrapper for RocksDB with full Column Family support.
727
+ )doc")
728
+ .def(py::init<const std::string&, PyOptions*, bool>(),
729
+ py::arg("path"),
730
+ py::arg("options") = nullptr,
731
+ py::arg("read_only") = false,
732
+ R"doc(
733
+ Opens or creates a RocksDB database with Column Family support.
734
+
735
+ Args:
736
+ path (str): The file system path to the database.
737
+ options (PyOptions, optional): Custom options for configuration.
738
+ read_only (bool, optional): If True, opens the database in read-only mode.
739
+ Defaults to False.
740
+ )doc", py::call_guard<py::gil_scoped_release>())
741
+
742
+ .def("put_cf", &PyRocksDBExtended::put_cf, py::arg("cf_handle"), py::arg("key"), py::arg("value"), py::arg("write_options") = nullptr, "Inserts a key-value pair into a specific column family.", py::call_guard<py::gil_scoped_release>())
743
+ .def("get_cf", &PyRocksDBExtended::get_cf, py::arg("cf_handle"), py::arg("key"), py::arg("read_options") = nullptr, "Retrieves the value for a key from a specific column family.")
744
+ .def("delete_cf", &PyRocksDBExtended::del_cf, py::arg("cf_handle"), py::arg("key"), py::arg("write_options") = nullptr, "Deletes a key from a specific column family.", py::call_guard<py::gil_scoped_release>())
745
+ .def("list_column_families", &PyRocksDBExtended::list_column_families, "Lists the names of all existing column families.")
746
+ .def("create_column_family", &PyRocksDBExtended::create_column_family, py::arg("name"), py::arg("cf_options") = nullptr, "Creates a new column family.", py::call_guard<py::gil_scoped_release>())
747
+ .def("drop_column_family", &PyRocksDBExtended::drop_column_family, py::arg("cf_handle"), "Drops a column family.", py::call_guard<py::gil_scoped_release>())
748
+ .def("new_cf_iterator", &PyRocksDBExtended::new_cf_iterator, py::arg("cf_handle"), py::arg("read_options") = nullptr, "Creates a new iterator for a specific column family.", py::keep_alive<0, 1>())
749
+ .def("get_column_family", &PyRocksDBExtended::get_column_family, py::arg("name"), "Retrieves a ColumnFamilyHandle by its name.")
750
+ .def_property_readonly("default_cf", &PyRocksDBExtended::get_default_cf, "Returns the handle for the default column family.");
302
751
  }
752
+
Binary file
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyrex-rocksdb
3
+ Version: 0.1.4
4
+ Summary: A fast RocksDB wrapper for Python using pybind11.
5
+ Author-email: Charilaos Mylonas <mylonas.charilaos@gmail.com>
6
+ Project-URL: Homepage, https://github.com/mylonasc/pyrex
7
+ Project-URL: Repository, https://github.com/mylonasc/pyrex
8
+ Keywords: rocksdb,database,key-value,pybind11
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Operating System :: MacOS :: MacOS X
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Development Status :: 3 - Alpha
20
+ Classifier: Intended Audience :: Developers
21
+ Classifier: Topic :: Database
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest; extra == "dev"
27
+ Requires-Dist: sphinx; extra == "dev"
28
+ Requires-Dist: sphinx-rtd-theme; extra == "dev"
29
+ Requires-Dist: cibuildwheel; extra == "dev"
30
+ Requires-Dist: pybind11-stubgen~=2.5.4; extra == "dev"
31
+ Requires-Dist: mypy~=1.10; extra == "dev"
32
+ Requires-Dist: twine; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+
36
+ [![PyPI version](https://img.shields.io/pypi/v/pyrex-rocksdb.svg)](https://pypi.org/project/pyrex-rocksdb/)
37
+ [![Python versions](https://img.shields.io/pypi/pyversions/pyrex-rocksdb.svg)](https://img.shields.io/pypi/pyversions/pyrex-rocksdb/)
38
+
39
+
40
+ ![pyrex](https://raw.githubusercontent.com/mylonasc/pyrex/main/assets/logo.png)
41
+
42
+ # Installation
43
+
44
+
45
+ # pyrex-rocksdb
46
+ A python wrapper for the original (C++) version of RocksDB.
47
+
48
+ ## Installation
49
+
50
+ For linux systems, wheels are provided and can be installed from pypi using:
51
+
52
+ ```bash
53
+ pip install pyrex-rocksdb
54
+ ```
55
+
56
+ For Windows and MacOS I have built an earlier version of the library.
57
+ I will re-build once I include certain other important features in the API that are not yet implemented.
58
+
59
+
60
+
61
+ ## Motivation
62
+
63
+ This library is intended for providing a fast, write-optimized, in-process key value (KV) store in python. Therefore the "big brothers" of the database are the likes of MongoDB and Cassandra. The difference is that you don't need a separate server to run this (hence "in-process") and it is designed to be fairly portable.
64
+
65
+ RocksDB, which is the underlying storage engine of this database, is an LSM-tree engine. An LSM-tree is different from the ballanced tree index databases (e.g., [B-tree](https://en.wikipedia.org/wiki/B-tree)/ and [B+tree](https://en.wikipedia.org/wiki/B%2B_tree) databases). LSM-tree databases offer very high write throughputs and better space efficiency. See more about the motivation for LSM-tree databases (and RocksDB in particular) in [this talk](https://www.youtube.com/watch?v=V_C-T5S-w8g).
66
+
67
+ ### LSM-tree + SSTable engine basics
68
+ To understand where `pyrex` provides efficiency gains, it is important to understand some basics about the underlying `RocksDB` engine.
69
+
70
+ RocksDB and LevelDB are **key-value stores** with a **Log-Structured Merge-tree (LSM-tree)** architecture.
71
+
72
+ The key components of LSM-tree architectures are
73
+ * A **MemTable** that stores in-memory sorted data
74
+ * A set of **Sorted-String tables (SSTables)** which are immutable sorted files on disk where data from the MemTable is flushed
75
+ * The process of **Compaction**, which is a background process that merges the SSTables to remove redundant data and keep read performance high.
76
+
77
+ In such databases, fast writes create many small, sorted data files called SSTables. To prevent reads from slowing down by checking too many files, a background process called compaction merges these SSTables together. This process organizes the data into levels, where newer, overlapping files sit in Level 0 and are progressively merged into higher levels (Level 1, Level 2, etc.). Each higher level contains larger, non-overlapping files, which ensures that finding a key remains efficient and old data is purged to save space. There are several optimizations and configurations possible for these processes (configurability and "pluggability" are commonly cited RocksDB advantages).
78
+
79
+ However the main big advantage of RocksDB over LevelDB is its **multi-threaded compaction support** (LevelDB supports only single threaded compaction, which comes with significant performance limitations).
80
+ There are several other configurability advantages RocksDB offers over LevelDB. For a more elaborate enumaration of RocksDB advantages please refer to the [RocksDB wiki](https://github.com/facebook/rocksdb/wiki/Features-Not-in-LevelDB).
81
+
82
+ Not all are currently supported by the `pyrex` API, but I'm working on supporting more of them. Feel free to open an issue if there is a feature you want to see (or open a pull request).
83
+
84
+
85
+ ## Example usage:
86
+
87
+ Here is a simple example showing the usage of put/get in the DB:
88
+
89
+ ```python
90
+ import pyrex
91
+ import os
92
+ import shutil
93
+
94
+ DB_PATH = "./test_rocksdb_minimal"
95
+
96
+ with pyrex.PyRocksDB(DB_PATH) as db:
97
+ db.put(b"my_key", b"my_value")
98
+ retrieved_value = db.get(b"my_key")
99
+
100
+ print(f"Retrieved: {retrieved_value.decode()}") # Output: Retrieved: my_value
101
+
102
+ ```
103
+
104
+ for more examples check the relevant folder and the documentation.
105
+
106
+ ## Installation
107
+
108
+ <details>
109
+ <summary>Note on CICD</summary>
110
+ The wheels provided are not completely platform-independent at the moment.
111
+ I heavily rely on github actions to develop since I don't own mac or windows machines.
112
+ The CICD workflow for package builds is under development A windows/macos/linux build was successful, but further development is needed.
113
+ </details>
114
+
115
+ ## Benchmarks
116
+
117
+ `Pyrex` was benchmarked against [plyvel](https://github.com/wbolster/plyvel) and [lmdb](https://github.com/jnwatson/py-lmdb/) (which is based on a B+tree -- based architecture and relies on OS's block cache).
118
+
119
+ Initial benchmarks are promissing and to be reported soon.
120
+
121
+
@@ -1,13 +1,13 @@
1
1
  pyrex/__init__.py,sha256=zshmllNLYDo3znwRaknHeAd4obEkjS3jK6UkcXAsvw4,831
2
- pyrex/_pyrex.cpython-312-darwin.so,sha256=hkoAwtprFoZamx3W55FLQUTnZMD77RfPQjxiV6UEo6E,540080
3
- pyrex/_pyrex.cpp,sha256=mgzrP3PpeDVylK7pkjts2L8hv_tuiQ_Mfy-80r39o8I,10986
2
+ pyrex/_pyrex.cpython-312-darwin.so,sha256=p_vRANlbMHWUpKJBM5XNe0wOdv_aJ87UfPVCxPdvHkQ,821024
3
+ pyrex/_pyrex.cpp,sha256=rTj0pKRt3M1ttRYOhN53kTCoRnetmnuS93c9MMdtGvQ,38172
4
4
  pyrex/.dylibs/libsnappy.1.2.2.dylib,sha256=RtFJkVOnmB0FU6L5PGz-SRdNhutch4TzcNi-wB13m_0,79184
5
5
  pyrex/.dylibs/liblz4.1.10.0.dylib,sha256=k7TzQdjquM1GQIEVgXlSdUc9wfXAF7lCCMVqOWU0l2o,176960
6
6
  pyrex/.dylibs/librocksdb.10.4.2.dylib,sha256=ybpmZR-J9Hr88Nf_Bkl7ae8siEQPfn8A20dmzJl-X7U,11266240
7
7
  pyrex/.dylibs/libgflags.2.2.2.dylib,sha256=uiE_yjZcs48WAx0cx5VsLtCi-Jf1tip-QamzYNGTG70,172336
8
8
  pyrex/.dylibs/libzstd.1.5.7.dylib,sha256=c-Z4ifatarhfQRED6aESDQIMTHw8XxG_OYBBdVYdDVg,670240
9
- pyrex_rocksdb-0.1.0.dist-info/RECORD,,
10
- pyrex_rocksdb-0.1.0.dist-info/WHEEL,sha256=VrhWOWJdu4wN9IKhAFBqWPMo6yuww-SFg9GbWc0qbmI,136
11
- pyrex_rocksdb-0.1.0.dist-info/top_level.txt,sha256=0YbfttFoNSJjWKBullYqKklNMzgq7obw3oD751OmOOo,6
12
- pyrex_rocksdb-0.1.0.dist-info/METADATA,sha256=-5O5YaIogXmPdAxSHCo-XlghhcZzMAJyAcN-gYnfYf8,2830
13
- pyrex_rocksdb-0.1.0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
9
+ pyrex_rocksdb-0.1.4.dist-info/RECORD,,
10
+ pyrex_rocksdb-0.1.4.dist-info/WHEEL,sha256=VrhWOWJdu4wN9IKhAFBqWPMo6yuww-SFg9GbWc0qbmI,136
11
+ pyrex_rocksdb-0.1.4.dist-info/top_level.txt,sha256=Hyct9jOureNtYbpQ0AUae8uzaC--sgtHGo5Kevyspgg,14
12
+ pyrex_rocksdb-0.1.4.dist-info/METADATA,sha256=EHT-tw_oAdy20FnOW2UAr6_N1CbK7ksowdUyyUef6qY,6050
13
+ pyrex_rocksdb-0.1.4.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
@@ -0,0 +1,2 @@
1
+ pyrex
2
+ rocksdb
@@ -1,76 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: pyrex-rocksdb
3
- Version: 0.1.0
4
- Summary: A fast RocksDB wrapper for Python using pybind11.
5
- Author-email: Charilaos Mylonas <mylonas.charilaos@gmail.com>
6
- Project-URL: Homepage, https://github.com/mylonasc/pyrex
7
- Project-URL: Repository, https://github.com/mylonasc/pyrex
8
- Keywords: rocksdb,database,key-value,pybind11
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.8
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Classifier: License :: OSI Approved :: Apache Software License
16
- Classifier: Operating System :: POSIX :: Linux
17
- Classifier: Operating System :: MacOS :: MacOS X
18
- Classifier: Operating System :: Microsoft :: Windows
19
- Classifier: Development Status :: 3 - Alpha
20
- Classifier: Intended Audience :: Developers
21
- Classifier: Topic :: Database
22
- Requires-Python: >=3.8
23
- Description-Content-Type: text/markdown
24
- License-File: LICENSE
25
- Provides-Extra: dev
26
- Requires-Dist: pytest; extra == "dev"
27
- Requires-Dist: sphinx; extra == "dev"
28
- Requires-Dist: sphinx-rtd-theme; extra == "dev"
29
- Requires-Dist: cibuildwheel; extra == "dev"
30
- Requires-Dist: twine; extra == "dev"
31
- Dynamic: license-file
32
-
33
- [![Build Status](https://github.com/mylonasc/pyrex/actions/workflows/build_wheels.yml/badge.svg)](https://github.com/mylonasc/pyrex/actions/workflows/build_wheels.yml)
34
-
35
- # pyrex
36
- a python rocksdb wrapper
37
-
38
- ## Motivation
39
- rocksdb python wrappers are broken. This is yet another attempt to create a working python wrapper for rocksdb.
40
-
41
- ## Example usage:
42
- Check the `test.py` file.
43
-
44
- ## Installation
45
-
46
- On Linux/macOS: Open your terminal, navigate to the parent directory of my_rocksdb_wrapper, and run:
47
-
48
-
49
- Build and Use the Wrapper:
50
- After saving the files, follow these steps to build and use your Python wrapper:
51
-
52
- ### Prerequisites:
53
-
54
- * RocksDB C++ Library Installed (headers and libraries accessible). (in Ubuntu `sudo apt-get install librocksdb` may suffice)
55
- * C++11 compatible compiler (e.g., g++ or clang++).
56
- * Python 3.7+ and its development headers.
57
-
58
- * Python pybind11 package: `pip install pybind11`
59
-
60
- Python setuptools package: `pip install --upgrade setuptools`
61
-
62
- ### Adjust setup.py (if needed):
63
-
64
- Open setup.py and verify that `include_dirs` and `library_dirs` correctly point to your RocksDB installation paths.
65
- If RocksDB is not in `/usr/local/include` or `/usr/local/lib`, update these paths.
66
-
67
- If RocksDB was built with specific compression libraries (like Snappy, Zlib, LZ4, Zstandard), add their corresponding names (e.g., 'snappy', 'z') to the libraries list.
68
-
69
- Compile the Wrapper:
70
- Navigate to the directory containing rocksdb_wrapper.cpp and setup.py in your terminal, and run:
71
-
72
- ```Bash
73
-
74
- python setup.py install
75
- ```
76
-
@@ -1 +0,0 @@
1
- pyrex