logosdb 0.7.8 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ #pragma once
2
+
3
+ #include "platform.h"
4
+
5
+ #include <cstddef>
6
+ #include <cstdint>
7
+ #include <string>
8
+ #include <vector>
9
+
10
+ namespace logosdb
11
+ {
12
+ namespace internal
13
+ {
14
+
15
+ // Data type for vector storage precision
16
+ enum StorageDtype
17
+ {
18
+ DTYPE_FLOAT32 = 0, // 4 bytes per dimension (default)
19
+ DTYPE_FLOAT16 = 1, // 2 bytes per dimension
20
+ DTYPE_INT8 = 2, // 1 byte per dimension
21
+ };
22
+
23
+ // Fixed-stride binary vector storage with mmap support.
24
+ // File layout: [header (32 bytes)] [row_0] [row_1] ...
25
+ //
26
+ // Header (v2):
27
+ // uint32_t magic = 0x4C4F474F ("LOGO")
28
+ // uint32_t version = 2
29
+ // uint32_t dim
30
+ // uint32_t dtype = 0=float32, 1=float16, 2=int8
31
+ // uint64_t n_rows
32
+ // float scale (for int8 quantization, 1.0 for others)
33
+
34
+ struct StorageHeader
35
+ {
36
+ uint32_t magic = 0x4C4F474F;
37
+ uint32_t version = 2;
38
+ uint32_t dim = 0;
39
+ uint32_t dtype = 0; // StorageDtype value
40
+ uint64_t n_rows = 0;
41
+ float scale = 1.0f; // For int8 dequantization
42
+ float reserved = 0.0f; // Padding to 32 bytes
43
+ };
44
+
45
+ static_assert(sizeof(StorageHeader) == 32, "header must be 32 bytes");
46
+
47
+ // Get bytes per element for a given dtype
48
+ size_t dtype_size(StorageDtype dtype);
49
+
50
+ // Convert float32 to float16 (IEEE 754 half-precision)
51
+ uint16_t float32_to_float16(float f);
52
+
53
+ // Convert float16 to float32
54
+ float float16_to_float32(uint16_t h);
55
+
56
+ // Quantize float32 array to int8 with given scale
57
+ void quantize_float32_to_int8(const float* src, int8_t* dst, int dim, float scale);
58
+
59
+ // Dequantize int8 array to float32 with given scale
60
+ void dequantize_int8_to_float32(const int8_t* src, float* dst, int dim, float scale);
61
+
62
+ // Compute optimal int8 scale for a vector (max absolute value / 127.0f)
63
+ float compute_int8_scale(const float* vec, int dim);
64
+
65
+ class VectorStorage
66
+ {
67
+ public:
68
+ VectorStorage() = default;
69
+ ~VectorStorage();
70
+
71
+ VectorStorage(const VectorStorage&) = delete;
72
+ VectorStorage& operator=(const VectorStorage&) = delete;
73
+
74
+ // Open with explicit dtype (for creating new databases)
75
+ bool open(const std::string& path, int dim, StorageDtype dtype, std::string& err);
76
+
77
+ // Open with default float32 dtype (backward compatibility)
78
+ bool open(const std::string& path, int dim, std::string& err)
79
+ {
80
+ return open(path, dim, DTYPE_FLOAT32, err);
81
+ }
82
+
83
+ void close();
84
+
85
+ uint64_t append(const float* vec, int dim, std::string& err);
86
+
87
+ /* Append n vectors efficiently. Returns the starting id, or UINT64_MAX on error.
88
+ * 'data' must contain n * dim floats. */
89
+ uint64_t append_batch(const float* data, int n, int dim, std::string& err);
90
+
91
+ size_t n_rows() const { return header_.n_rows; }
92
+ int dim() const { return (int)header_.dim; }
93
+ StorageDtype dtype() const { return static_cast<StorageDtype>(header_.dtype); }
94
+
95
+ // Pointer to the i-th row (valid while file is open/mapped).
96
+ // Note: For reduced precision, this returns a pointer to the raw storage.
97
+ // Use row_to_float32() to get dequantized data.
98
+ const void* row_raw(uint64_t idx) const;
99
+
100
+ // Get a row and dequantize to float32 (caller must provide buffer of size dim)
101
+ void row_to_float32(uint64_t idx, float* out) const;
102
+
103
+ // Pointer to the start of all vector data (for bulk tensor load).
104
+ // Note: This is raw storage, not dequantized.
105
+ const void* data_raw() const;
106
+
107
+ // Dequantize all vectors to float32 buffer
108
+ void data_to_float32(float* out) const;
109
+
110
+ bool sync(std::string& err);
111
+
112
+ private:
113
+ bool remap(std::string& err);
114
+ void unmap();
115
+ bool reserve_mapping(size_t min_size, std::string& err);
116
+ bool extend_mapping_if_needed(std::string& err);
117
+ size_t row_stride_bytes() const;
118
+
119
+ std::string path_;
120
+ int fd_ = -1;
121
+ StorageHeader header_ = {};
122
+ uint8_t* map_base_ = nullptr;
123
+ size_t map_size_ = 0; // Currently mapped size
124
+ size_t file_size_ = 0; // Current file size
125
+ size_t reserved_size_ = 0; // Reserved address space size
126
+ static constexpr size_t DEFAULT_RESERVE_SIZE = 1ULL << 30; // 1 GB reservation
127
+ platform::MappedFile platform_map_{}; // For Windows memory mapping
128
+ };
129
+
130
+ } // namespace internal
131
+ } // namespace logosdb
@@ -0,0 +1,570 @@
1
+ #include "wal.h"
2
+
3
+ #include <fcntl.h>
4
+ #include <sys/stat.h>
5
+
6
+ #include <cerrno>
7
+ #include <cstring>
8
+
9
+ // For pread/pwrite on older POSIX systems
10
+ #ifndef _WIN32
11
+ #ifndef _GNU_SOURCE
12
+ #define _GNU_SOURCE
13
+ #endif
14
+ #include <unistd.h>
15
+ #endif
16
+
17
+ #ifdef _WIN32
18
+ #include <io.h>
19
+ #endif
20
+
21
+ namespace logosdb
22
+ {
23
+ namespace internal
24
+ {
25
+
26
+ static constexpr uint32_t WAL_MAGIC = 0x57474F4C; // "LOGW" in little-endian
27
+ static constexpr uint32_t WAL_VERSION = 1;
28
+
29
+ WriteAheadLog::~WriteAheadLog()
30
+ {
31
+ close();
32
+ }
33
+
34
+ bool WriteAheadLog::open(const std::string& path, std::string& err)
35
+ {
36
+ close();
37
+ path_ = path;
38
+
39
+ #ifdef _WIN32
40
+ int flags = O_RDWR | O_CREAT | O_BINARY;
41
+ #else
42
+ int flags = O_RDWR | O_CREAT;
43
+ #endif
44
+ fd_ = ::open(path.c_str(), flags, 0644);
45
+ if (fd_ < 0)
46
+ {
47
+ err = std::string("wal open: ") + strerror(errno);
48
+ return false;
49
+ }
50
+
51
+ struct stat st;
52
+ if (fstat(fd_, &st) != 0)
53
+ {
54
+ err = std::string("wal fstat: ") + strerror(errno);
55
+ close();
56
+ return false;
57
+ }
58
+
59
+ if (st.st_size == 0)
60
+ {
61
+ // New file: write header
62
+ uint32_t header[2] = {WAL_MAGIC, WAL_VERSION};
63
+ #ifdef _WIN32
64
+ if (_write(fd_, header, sizeof(header)) != sizeof(header))
65
+ {
66
+ #else
67
+ if (::write(fd_, header, sizeof(header)) != sizeof(header))
68
+ {
69
+ #endif
70
+ err = std::string("wal write header: ") + strerror(errno);
71
+ close();
72
+ return false;
73
+ }
74
+ pending_count_ = 0;
75
+ }
76
+ else
77
+ {
78
+ // Existing file: validate header and count pending entries
79
+ uint32_t header[2];
80
+ #ifdef _WIN32
81
+ if (_lseeki64(fd_, 0, SEEK_SET) != 0 ||
82
+ _read(fd_, header, sizeof(header)) != sizeof(header))
83
+ {
84
+ #else
85
+ if (::pread(fd_, header, sizeof(header), 0) != sizeof(header))
86
+ {
87
+ #endif
88
+ err = std::string("wal read header: ") + strerror(errno);
89
+ close();
90
+ return false;
91
+ }
92
+ if (header[0] != WAL_MAGIC)
93
+ {
94
+ err = "wal: bad magic";
95
+ close();
96
+ return false;
97
+ }
98
+ if (header[1] != WAL_VERSION)
99
+ {
100
+ err = "wal: version mismatch";
101
+ close();
102
+ return false;
103
+ }
104
+
105
+ // Scan for pending entries
106
+ int64_t offset = sizeof(header);
107
+ WALEntry entry;
108
+ while (true)
109
+ {
110
+ if (!read_entry_at(offset, entry, err))
111
+ {
112
+ if (err.empty())
113
+ break; // EOF
114
+ close();
115
+ return false;
116
+ }
117
+ if (entry.state == WALState::PENDING)
118
+ {
119
+ ++pending_count_;
120
+ }
121
+ // Calculate next entry offset
122
+ offset += 1; // state byte
123
+ offset += 4; // dim
124
+ offset += 4 + entry.vector.size() * sizeof(float); // vector len + data
125
+ offset += 4 + entry.text.size(); // text len + data
126
+ offset += 4 + entry.timestamp.size(); // ts len + data
127
+ offset += 8; // expected_id
128
+ }
129
+ err.clear();
130
+ }
131
+
132
+ return true;
133
+ }
134
+
135
+ void WriteAheadLog::close()
136
+ {
137
+ if (fd_ >= 0)
138
+ {
139
+ #ifdef _WIN32
140
+ _close(fd_);
141
+ #else
142
+ ::close(fd_);
143
+ #endif
144
+ fd_ = -1;
145
+ }
146
+ path_.clear();
147
+ pending_count_ = 0;
148
+ }
149
+
150
+ int64_t WriteAheadLog::append_pending(const float* vec,
151
+ int dim,
152
+ const char* text,
153
+ const char* timestamp,
154
+ uint64_t expected_id,
155
+ std::string& err)
156
+ {
157
+ if (fd_ < 0)
158
+ {
159
+ err = "wal not open";
160
+ return -1;
161
+ }
162
+
163
+ // Get current file position (where we'll write this entry)
164
+ #ifdef _WIN32
165
+ int64_t offset = _lseeki64(fd_, 0, SEEK_END);
166
+ #else
167
+ off_t offset = ::lseek(fd_, 0, SEEK_END);
168
+ #endif
169
+ if (offset < 0)
170
+ {
171
+ err = std::string("wal lseek: ") + strerror(errno);
172
+ return -1;
173
+ }
174
+
175
+ // Write state (PENDING)
176
+ uint8_t state = static_cast<uint8_t>(WALState::PENDING);
177
+ #ifdef _WIN32
178
+ if (_write(fd_, &state, 1) != 1)
179
+ {
180
+ #else
181
+ if (::write(fd_, &state, 1) != 1)
182
+ {
183
+ #endif
184
+ err = std::string("wal write state: ") + strerror(errno);
185
+ return -1;
186
+ }
187
+
188
+ // Write dim
189
+ uint32_t dim_u32 = static_cast<uint32_t>(dim);
190
+ #ifdef _WIN32
191
+ if (_write(fd_, &dim_u32, 4) != 4)
192
+ {
193
+ #else
194
+ if (::write(fd_, &dim_u32, 4) != 4)
195
+ {
196
+ #endif
197
+ err = std::string("wal write dim: ") + strerror(errno);
198
+ return -1;
199
+ }
200
+
201
+ // Write vector length and data
202
+ uint32_t vec_bytes = dim * sizeof(float);
203
+ #ifdef _WIN32
204
+ if (_write(fd_, &vec_bytes, 4) != 4)
205
+ {
206
+ err = std::string("wal write vec len: ") + strerror(errno);
207
+ return -1;
208
+ }
209
+ if (vec_bytes > 0 && _write(fd_, vec, vec_bytes) != vec_bytes)
210
+ {
211
+ #else
212
+ if (::write(fd_, &vec_bytes, 4) != 4)
213
+ {
214
+ err = std::string("wal write vec len: ") + strerror(errno);
215
+ return -1;
216
+ }
217
+ if (vec_bytes > 0 && ::write(fd_, vec, vec_bytes) != vec_bytes)
218
+ {
219
+ #endif
220
+ err = std::string("wal write vec data: ") + strerror(errno);
221
+ return -1;
222
+ }
223
+
224
+ // Write text
225
+ std::string t = text ? text : "";
226
+ uint32_t text_len = static_cast<uint32_t>(t.size());
227
+ #ifdef _WIN32
228
+ if (_write(fd_, &text_len, 4) != 4)
229
+ {
230
+ err = std::string("wal write text len: ") + strerror(errno);
231
+ return -1;
232
+ }
233
+ if (text_len > 0 && _write(fd_, t.data(), text_len) != text_len)
234
+ {
235
+ #else
236
+ if (::write(fd_, &text_len, 4) != 4)
237
+ {
238
+ err = std::string("wal write text len: ") + strerror(errno);
239
+ return -1;
240
+ }
241
+ if (text_len > 0 && ::write(fd_, t.data(), text_len) != text_len)
242
+ {
243
+ #endif
244
+ err = std::string("wal write text: ") + strerror(errno);
245
+ return -1;
246
+ }
247
+
248
+ // Write timestamp
249
+ std::string ts = timestamp ? timestamp : "";
250
+ uint32_t ts_len = static_cast<uint32_t>(ts.size());
251
+ #ifdef _WIN32
252
+ if (_write(fd_, &ts_len, 4) != 4)
253
+ {
254
+ err = std::string("wal write ts len: ") + strerror(errno);
255
+ return -1;
256
+ }
257
+ if (ts_len > 0 && _write(fd_, ts.data(), ts_len) != ts_len)
258
+ {
259
+ #else
260
+ if (::write(fd_, &ts_len, 4) != 4)
261
+ {
262
+ err = std::string("wal write ts len: ") + strerror(errno);
263
+ return -1;
264
+ }
265
+ if (ts_len > 0 && ::write(fd_, ts.data(), ts_len) != ts_len)
266
+ {
267
+ #endif
268
+ err = std::string("wal write ts: ") + strerror(errno);
269
+ return -1;
270
+ }
271
+
272
+ // Write expected_id
273
+ #ifdef _WIN32
274
+ if (_write(fd_, &expected_id, 8) != 8)
275
+ {
276
+ #else
277
+ if (::write(fd_, &expected_id, 8) != 8)
278
+ {
279
+ #endif
280
+ err = std::string("wal write expected_id: ") + strerror(errno);
281
+ return -1;
282
+ }
283
+
284
+ // Sync to ensure WAL entry is durable before we modify stores
285
+ if (!sync(err))
286
+ {
287
+ return -1;
288
+ }
289
+
290
+ ++pending_count_;
291
+ return offset;
292
+ }
293
+
294
+ bool WriteAheadLog::mark_committed(int64_t offset, std::string& err)
295
+ {
296
+ if (fd_ < 0)
297
+ {
298
+ err = "wal not open";
299
+ return false;
300
+ }
301
+
302
+ if (!write_state_at(offset, WALState::COMMITTED, err))
303
+ {
304
+ return false;
305
+ }
306
+
307
+ if (pending_count_ > 0)
308
+ --pending_count_;
309
+ return sync(err);
310
+ }
311
+
312
+ bool WriteAheadLog::write_state_at(int64_t offset, WALState state, std::string& err)
313
+ {
314
+ uint8_t state_byte = static_cast<uint8_t>(state);
315
+ #ifdef _WIN32
316
+ // Windows: seek + write (no pwrite)
317
+ if (_lseeki64(fd_, offset, SEEK_SET) != offset || _write(fd_, &state_byte, 1) != 1)
318
+ {
319
+ #else
320
+ if (::pwrite(fd_, &state_byte, 1, offset) != 1)
321
+ {
322
+ #endif
323
+ err = std::string("wal pwrite state: ") + strerror(errno);
324
+ return false;
325
+ }
326
+ return true;
327
+ }
328
+
329
+ bool WriteAheadLog::read_entry_at(int64_t offset, WALEntry& entry, std::string& err)
330
+ {
331
+ err.clear();
332
+
333
+ #ifdef _WIN32
334
+ // Windows: seek to offset before reading
335
+ if (_lseeki64(fd_, offset, SEEK_SET) != offset)
336
+ {
337
+ return false; // EOF or error
338
+ }
339
+ #endif
340
+
341
+ // Read state
342
+ uint8_t state_byte;
343
+ #ifdef _WIN32
344
+ if (_read(fd_, &state_byte, 1) != 1)
345
+ {
346
+ return false; // EOF or error
347
+ }
348
+ #else
349
+ if (::pread(fd_, &state_byte, 1, offset) != 1)
350
+ {
351
+ return false; // EOF or error
352
+ }
353
+ #endif
354
+ entry.state = static_cast<WALState>(state_byte);
355
+ offset += 1;
356
+
357
+ // Read dim
358
+ uint32_t dim;
359
+ #ifdef _WIN32
360
+ if (_read(fd_, &dim, 4) != 4)
361
+ {
362
+ #else
363
+ if (::pread(fd_, &dim, 4, offset) != 4)
364
+ {
365
+ #endif
366
+ err = "wal: truncated entry (dim)";
367
+ return false;
368
+ }
369
+ entry.dim = dim;
370
+ offset += 4;
371
+
372
+ // Read vector
373
+ uint32_t vec_bytes;
374
+ #ifdef _WIN32
375
+ if (_read(fd_, &vec_bytes, 4) != 4)
376
+ {
377
+ #else
378
+ if (::pread(fd_, &vec_bytes, 4, offset) != 4)
379
+ {
380
+ #endif
381
+ err = "wal: truncated entry (vec len)";
382
+ return false;
383
+ }
384
+ offset += 4;
385
+ if (vec_bytes > 0)
386
+ {
387
+ entry.vector.resize(vec_bytes / sizeof(float));
388
+ #ifdef _WIN32
389
+ if (_read(fd_, entry.vector.data(), vec_bytes) != vec_bytes)
390
+ {
391
+ #else
392
+ if (::pread(fd_, entry.vector.data(), vec_bytes, offset) != vec_bytes)
393
+ {
394
+ #endif
395
+ err = "wal: truncated entry (vec data)";
396
+ return false;
397
+ }
398
+ }
399
+ else
400
+ {
401
+ entry.vector.clear();
402
+ }
403
+ offset += vec_bytes;
404
+
405
+ // Read text
406
+ uint32_t text_len;
407
+ #ifdef _WIN32
408
+ if (_read(fd_, &text_len, 4) != 4)
409
+ {
410
+ #else
411
+ if (::pread(fd_, &text_len, 4, offset) != 4)
412
+ {
413
+ #endif
414
+ err = "wal: truncated entry (text len)";
415
+ return false;
416
+ }
417
+ offset += 4;
418
+ entry.text.resize(text_len);
419
+ if (text_len > 0)
420
+ {
421
+ #ifdef _WIN32
422
+ if (_read(fd_, &entry.text[0], text_len) != text_len)
423
+ {
424
+ #else
425
+ if (::pread(fd_, &entry.text[0], text_len, offset) != text_len)
426
+ {
427
+ #endif
428
+ err = "wal: truncated entry (text data)";
429
+ return false;
430
+ }
431
+ }
432
+ offset += text_len;
433
+
434
+ // Read timestamp
435
+ uint32_t ts_len;
436
+ #ifdef _WIN32
437
+ if (_read(fd_, &ts_len, 4) != 4)
438
+ {
439
+ #else
440
+ if (::pread(fd_, &ts_len, 4, offset) != 4)
441
+ {
442
+ #endif
443
+ err = "wal: truncated entry (ts len)";
444
+ return false;
445
+ }
446
+ offset += 4;
447
+ entry.timestamp.resize(ts_len);
448
+ if (ts_len > 0)
449
+ {
450
+ #ifdef _WIN32
451
+ if (_read(fd_, &entry.timestamp[0], ts_len) != ts_len)
452
+ {
453
+ #else
454
+ if (::pread(fd_, &entry.timestamp[0], ts_len, offset) != ts_len)
455
+ {
456
+ #endif
457
+ err = "wal: truncated entry (ts data)";
458
+ return false;
459
+ }
460
+ }
461
+ offset += ts_len;
462
+
463
+ // Read expected_id
464
+ #ifdef _WIN32
465
+ if (_read(fd_, &entry.expected_id, 8) != 8)
466
+ {
467
+ #else
468
+ if (::pread(fd_, &entry.expected_id, 8, offset) != 8)
469
+ {
470
+ #endif
471
+ err = "wal: truncated entry (expected_id)";
472
+ return false;
473
+ }
474
+
475
+ return true;
476
+ }
477
+
478
+ int WriteAheadLog::replay_pending(std::function<bool(const WALEntry&, std::string&)> replay_fn,
479
+ std::string& err)
480
+ {
481
+ if (fd_ < 0)
482
+ {
483
+ err = "wal not open";
484
+ return -1;
485
+ }
486
+
487
+ int64_t offset = 8; // Skip header (magic + version)
488
+ int replayed = 0;
489
+ WALEntry entry;
490
+
491
+ while (true)
492
+ {
493
+ // Peek at next entry state
494
+ uint8_t state_byte;
495
+ int64_t r = 0;
496
+ #ifdef _WIN32
497
+ r = _lseeki64(fd_, offset, SEEK_SET);
498
+ if (r != offset)
499
+ break;
500
+ r = _read(fd_, &state_byte, 1);
501
+ #else
502
+ r = static_cast<int64_t>(::pread(fd_, &state_byte, 1, offset));
503
+ #endif
504
+ if (r == 0)
505
+ break; // EOF
506
+ if (r < 0)
507
+ {
508
+ err = std::string("wal replay pread: ") + strerror(errno);
509
+ return -1;
510
+ }
511
+
512
+ // Read full entry
513
+ if (!read_entry_at(offset, entry, err))
514
+ {
515
+ if (err.empty())
516
+ break;
517
+ return -1;
518
+ }
519
+
520
+ // Only replay pending entries
521
+ if (entry.state == WALState::PENDING)
522
+ {
523
+ if (!replay_fn(entry, err))
524
+ {
525
+ return -1;
526
+ }
527
+ // Mark as committed after successful replay
528
+ if (!write_state_at(offset, WALState::COMMITTED, err))
529
+ {
530
+ return -1;
531
+ }
532
+ if (pending_count_ > 0)
533
+ --pending_count_;
534
+ ++replayed;
535
+ }
536
+
537
+ // Advance to next entry
538
+ offset += 1; // state
539
+ offset += 4; // dim
540
+ offset += 4 + entry.vector.size() * sizeof(float); // vector
541
+ offset += 4 + entry.text.size(); // text
542
+ offset += 4 + entry.timestamp.size(); // timestamp
543
+ offset += 8; // expected_id
544
+ }
545
+
546
+ return replayed;
547
+ }
548
+
549
+ bool WriteAheadLog::sync(std::string& err)
550
+ {
551
+ if (fd_ < 0)
552
+ {
553
+ err = "wal not open";
554
+ return false;
555
+ }
556
+ #ifdef _WIN32
557
+ if (_commit(fd_) != 0)
558
+ {
559
+ #else
560
+ if (::fsync(fd_) != 0)
561
+ {
562
+ #endif
563
+ err = std::string("wal fsync: ") + strerror(errno);
564
+ return false;
565
+ }
566
+ return true;
567
+ }
568
+
569
+ } // namespace internal
570
+ } // namespace logosdb