logosdb 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,764 @@
1
+ #include "storage.h"
2
+
3
+ #include "platform.h"
4
+
5
+ #include <fcntl.h>
6
+ #include <sys/stat.h>
7
+ #include <sys/types.h>
8
+
9
+ #include <cerrno>
10
+ #include <cmath>
11
+ #include <cstring>
12
+
13
+ #ifdef _WIN32
14
+ #include <io.h>
15
+ #else
16
+ #include <unistd.h>
17
+ #endif
18
+
19
+ namespace logosdb
20
+ {
21
+ namespace internal
22
+ {
23
+
24
+ /* ── Dtype utilities ─────────────────────────────────────────────────── */
25
+
26
+ size_t dtype_size(StorageDtype dtype)
27
+ {
28
+ switch (dtype)
29
+ {
30
+ case DTYPE_FLOAT16:
31
+ return 2;
32
+ case DTYPE_INT8:
33
+ return 1;
34
+ case DTYPE_FLOAT32:
35
+ default:
36
+ return 4;
37
+ }
38
+ }
39
+
40
+ // IEEE 754 float32 to float16 conversion
41
+ uint16_t float32_to_float16(float f)
42
+ {
43
+ // Bit-level reinterpretation
44
+ union
45
+ {
46
+ float f;
47
+ uint32_t u;
48
+ } v = {f};
49
+ uint32_t u = v.u;
50
+
51
+ uint32_t sign = (u >> 31) & 0x1;
52
+ uint32_t exp = (u >> 23) & 0xFF;
53
+ uint32_t mant = u & 0x7FFFFF;
54
+
55
+ // Handle special cases
56
+ if (exp == 0xFF)
57
+ { // Inf or NaN
58
+ if (mant != 0)
59
+ return (sign << 15) | 0x7C00 | (mant >> 13); // NaN
60
+ return (sign << 15) | 0x7C00; // Inf
61
+ }
62
+
63
+ // Convert exponent
64
+ int32_t new_exp = (int32_t)exp - 127 + 15;
65
+
66
+ if (new_exp >= 31)
67
+ {
68
+ // Overflow to infinity
69
+ return (sign << 15) | 0x7C00;
70
+ }
71
+ else if (new_exp <= 0)
72
+ {
73
+ // Underflow to zero or denormal
74
+ if (new_exp < -10)
75
+ {
76
+ return sign << 15; // Zero
77
+ }
78
+ // Denormal
79
+ mant = (mant | 0x800000) >> (1 - new_exp);
80
+ return (sign << 15) | (mant >> 13);
81
+ }
82
+
83
+ // Normal case
84
+ return (sign << 15) | (new_exp << 10) | (mant >> 13);
85
+ }
86
+
87
+ float float16_to_float32(uint16_t h)
88
+ {
89
+ uint32_t sign = (h >> 15) & 0x1;
90
+ uint32_t exp = (h >> 10) & 0x1F;
91
+ uint32_t mant = h & 0x3FF;
92
+
93
+ if (exp == 0)
94
+ {
95
+ if (mant == 0)
96
+ {
97
+ // Zero
98
+ return sign ? -0.0f : 0.0f;
99
+ }
100
+ // Denormal
101
+ float f = mant / 1024.0f;
102
+ return sign ? -f * 0.00006103515625f : f * 0.00006103515625f;
103
+ }
104
+ else if (exp == 31)
105
+ {
106
+ if (mant != 0)
107
+ {
108
+ // NaN
109
+ union
110
+ {
111
+ uint32_t u;
112
+ float f;
113
+ } v = {(sign << 31) | 0x7FC00000 | (mant << 13)};
114
+ return v.f;
115
+ }
116
+ // Inf
117
+ union
118
+ {
119
+ uint32_t u;
120
+ float f;
121
+ } v = {(sign << 31) | 0x7F800000};
122
+ return v.f;
123
+ }
124
+
125
+ // Normal
126
+ uint32_t u = (sign << 31) | ((exp + 112) << 23) | (mant << 13);
127
+ union
128
+ {
129
+ uint32_t u;
130
+ float f;
131
+ } v = {u};
132
+ return v.f;
133
+ }
134
+
135
+ void quantize_float32_to_int8(const float* src, int8_t* dst, int dim, float scale)
136
+ {
137
+ if (scale == 0.0f)
138
+ {
139
+ for (int i = 0; i < dim; ++i)
140
+ dst[i] = 0;
141
+ return;
142
+ }
143
+ float inv_scale = 127.0f / scale;
144
+ for (int i = 0; i < dim; ++i)
145
+ {
146
+ int32_t val = (int32_t)std::round(src[i] * inv_scale);
147
+ // Clamp to [-127, 127] (reserve -128 for future)
148
+ if (val > 127)
149
+ val = 127;
150
+ if (val < -127)
151
+ val = -127;
152
+ dst[i] = (int8_t)val;
153
+ }
154
+ }
155
+
156
+ void dequantize_int8_to_float32(const int8_t* src, float* dst, int dim, float scale)
157
+ {
158
+ float scale_127 = scale / 127.0f;
159
+ for (int i = 0; i < dim; ++i)
160
+ {
161
+ dst[i] = (float)src[i] * scale_127;
162
+ }
163
+ }
164
+
165
+ float compute_int8_scale(const float* vec, int dim)
166
+ {
167
+ float max_abs = 0.0f;
168
+ for (int i = 0; i < dim; ++i)
169
+ {
170
+ float abs_val = std::abs(vec[i]);
171
+ if (abs_val > max_abs)
172
+ max_abs = abs_val;
173
+ }
174
+ return max_abs;
175
+ }
176
+
177
+ /* ── VectorStorage implementation ───────────────────────────────────── */
178
+
179
+ static size_t row_stride(int dim, StorageDtype dtype)
180
+ {
181
+ return (size_t)dim * dtype_size(dtype);
182
+ }
183
+
184
+ static bool checked_file_size(uint64_t n_rows, int dim, StorageDtype dtype, size_t& out)
185
+ {
186
+ size_t stride = row_stride(dim, dtype);
187
+ if (stride > 0 && n_rows > (SIZE_MAX - sizeof(StorageHeader)) / stride)
188
+ {
189
+ return false; // would overflow
190
+ }
191
+ out = sizeof(StorageHeader) + n_rows * stride;
192
+ return true;
193
+ }
194
+
195
+ #ifdef _WIN32
196
+ static int64_t file_pwrite(int fd, const void* buf, size_t count, size_t offset)
197
+ {
198
+ if (_lseeki64(fd, static_cast<__int64>(offset), SEEK_SET) < 0)
199
+ {
200
+ return -1;
201
+ }
202
+ int written = _write(fd, buf, static_cast<unsigned int>(count));
203
+ return written < 0 ? -1 : static_cast<int64_t>(written);
204
+ }
205
+
206
+ static int64_t file_pread(int fd, void* buf, size_t count, size_t offset)
207
+ {
208
+ if (_lseeki64(fd, static_cast<__int64>(offset), SEEK_SET) < 0)
209
+ {
210
+ return -1;
211
+ }
212
+ int read_bytes = _read(fd, buf, static_cast<unsigned int>(count));
213
+ return read_bytes < 0 ? -1 : static_cast<int64_t>(read_bytes);
214
+ }
215
+ #else
216
+ static int64_t file_pwrite(int fd, const void* buf, size_t count, size_t offset)
217
+ {
218
+ ssize_t written = ::pwrite(fd, buf, count, static_cast<off_t>(offset));
219
+ return written < 0 ? -1 : static_cast<int64_t>(written);
220
+ }
221
+
222
+ static int64_t file_pread(int fd, void* buf, size_t count, size_t offset)
223
+ {
224
+ ssize_t read_bytes = ::pread(fd, buf, count, static_cast<off_t>(offset));
225
+ return read_bytes < 0 ? -1 : static_cast<int64_t>(read_bytes);
226
+ }
227
+ #endif
228
+
229
+ VectorStorage::~VectorStorage()
230
+ {
231
+ close();
232
+ }
233
+
234
+ bool VectorStorage::open(const std::string& path, int dim, StorageDtype dtype, std::string& err)
235
+ {
236
+ close();
237
+ path_ = path;
238
+
239
+ #ifdef _WIN32
240
+ int flags = O_RDWR | O_CREAT | O_BINARY;
241
+ #else
242
+ int flags = O_RDWR | O_CREAT;
243
+ #endif
244
+ fd_ = ::open(path.c_str(), flags, 0644);
245
+ if (fd_ < 0)
246
+ {
247
+ err = std::string("open: ") + strerror(errno);
248
+ return false;
249
+ }
250
+
251
+ struct stat st;
252
+ if (fstat(fd_, &st) != 0)
253
+ {
254
+ err = std::string("fstat: ") + strerror(errno);
255
+ close();
256
+ return false;
257
+ }
258
+ file_size_ = (size_t)st.st_size;
259
+
260
+ if (file_size_ == 0)
261
+ {
262
+ // Create new file with specified dtype
263
+ header_ = {};
264
+ header_.version = 2;
265
+ header_.dim = (uint32_t)dim;
266
+ header_.dtype = (uint32_t)dtype;
267
+ header_.n_rows = 0;
268
+ header_.scale = 1.0f;
269
+ if (!platform::file_truncate(fd_, sizeof(StorageHeader)))
270
+ {
271
+ err = std::string("ftruncate: ") + strerror(errno);
272
+ close();
273
+ return false;
274
+ }
275
+ if (file_pwrite(fd_, &header_, sizeof(header_), 0) != static_cast<int64_t>(sizeof(header_)))
276
+ {
277
+ err = "failed to write header";
278
+ close();
279
+ return false;
280
+ }
281
+ file_size_ = sizeof(StorageHeader);
282
+ }
283
+ else
284
+ {
285
+ if (file_size_ < sizeof(StorageHeader))
286
+ {
287
+ err = "file too small for header";
288
+ close();
289
+ return false;
290
+ }
291
+ if (file_pread(fd_, &header_, sizeof(header_), 0) != static_cast<int64_t>(sizeof(header_)))
292
+ {
293
+ err = "failed to read header";
294
+ close();
295
+ return false;
296
+ }
297
+ if (header_.magic != 0x4C4F474F)
298
+ {
299
+ err = "bad magic";
300
+ close();
301
+ return false;
302
+ }
303
+
304
+ // Handle version compatibility
305
+ if (header_.version == 0 || header_.version == 1)
306
+ {
307
+ // v1 files are float32 only - upgrade to v2 in memory
308
+ header_.version = 2;
309
+ header_.dtype = DTYPE_FLOAT32;
310
+ header_.scale = 1.0f;
311
+ header_.reserved = 0.0f;
312
+ }
313
+ else if (header_.version != 2)
314
+ {
315
+ err = "unsupported file version: " + std::to_string(header_.version);
316
+ close();
317
+ return false;
318
+ }
319
+
320
+ // Check dtype compatibility
321
+ StorageDtype file_dtype = static_cast<StorageDtype>(header_.dtype);
322
+ if (file_dtype != dtype && file_dtype != DTYPE_FLOAT32)
323
+ {
324
+ // Allow opening existing files even if requested dtype differs
325
+ // We'll use the file's dtype
326
+ }
327
+
328
+ if (header_.dim != (uint32_t)dim && dim > 0 && header_.dim > 0)
329
+ {
330
+ err = "dimension mismatch (file=" + std::to_string(header_.dim) +
331
+ " requested=" + std::to_string(dim) + ")";
332
+ close();
333
+ return false;
334
+ }
335
+ if (dim > 0 && header_.dim == 0)
336
+ {
337
+ header_.dim = (uint32_t)dim;
338
+ }
339
+ // Validate n_rows against actual file size to detect corruption.
340
+ size_t expected_size = 0;
341
+ if (!checked_file_size(header_.n_rows, (int)header_.dim, file_dtype, expected_size) ||
342
+ expected_size > file_size_)
343
+ {
344
+ size_t stride = row_stride((int)header_.dim, file_dtype);
345
+ uint64_t max_rows = stride > 0 ? (file_size_ - sizeof(StorageHeader)) / stride : 0;
346
+ header_.n_rows = max_rows;
347
+ }
348
+ }
349
+
350
+ if (!reserve_mapping(file_size_, err))
351
+ {
352
+ close();
353
+ return false;
354
+ }
355
+ return true;
356
+ }
357
+
358
+ void VectorStorage::close()
359
+ {
360
+ unmap();
361
+ if (fd_ >= 0)
362
+ {
363
+ #ifdef _WIN32
364
+ _close(fd_);
365
+ #else
366
+ ::close(fd_);
367
+ #endif
368
+ fd_ = -1;
369
+ }
370
+ header_ = {};
371
+ file_size_ = 0;
372
+ reserved_size_ = 0;
373
+ }
374
+
375
+ uint64_t VectorStorage::append(const float* vec, int dim, std::string& err)
376
+ {
377
+ if (fd_ < 0)
378
+ {
379
+ err = "not open";
380
+ return UINT64_MAX;
381
+ }
382
+ if ((uint32_t)dim != header_.dim)
383
+ {
384
+ err = "dim mismatch on append";
385
+ return UINT64_MAX;
386
+ }
387
+
388
+ StorageDtype dtype = static_cast<StorageDtype>(header_.dtype);
389
+ size_t new_size = 0;
390
+ if (!checked_file_size(header_.n_rows + 1, dim, dtype, new_size))
391
+ {
392
+ err = "storage size overflow";
393
+ return UINT64_MAX;
394
+ }
395
+ size_t stride = row_stride(dim, dtype);
396
+ size_t offset = new_size - stride;
397
+
398
+ if (!platform::file_truncate(fd_, new_size))
399
+ {
400
+ err = std::string("ftruncate: ") + strerror(errno);
401
+ return UINT64_MAX;
402
+ }
403
+
404
+ // Convert and write based on dtype
405
+ if (dtype == DTYPE_FLOAT32)
406
+ {
407
+ if (file_pwrite(fd_, vec, stride, offset) != static_cast<int64_t>(stride))
408
+ {
409
+ err = "pwrite vec failed";
410
+ return UINT64_MAX;
411
+ }
412
+ }
413
+ else if (dtype == DTYPE_FLOAT16)
414
+ {
415
+ std::vector<uint16_t> half(dim);
416
+ for (int i = 0; i < dim; ++i)
417
+ {
418
+ half[i] = float32_to_float16(vec[i]);
419
+ }
420
+ if (file_pwrite(fd_, half.data(), stride, offset) != static_cast<int64_t>(stride))
421
+ {
422
+ err = "pwrite float16 vec failed";
423
+ return UINT64_MAX;
424
+ }
425
+ }
426
+ else if (dtype == DTYPE_INT8)
427
+ {
428
+ // Update global scale if needed
429
+ float vec_scale = compute_int8_scale(vec, dim);
430
+ if (vec_scale > header_.scale)
431
+ {
432
+ header_.scale = vec_scale;
433
+ }
434
+ std::vector<int8_t> quantized(dim);
435
+ quantize_float32_to_int8(vec, quantized.data(), dim, header_.scale);
436
+ if (file_pwrite(fd_, quantized.data(), stride, offset) != static_cast<int64_t>(stride))
437
+ {
438
+ err = "pwrite int8 vec failed";
439
+ return UINT64_MAX;
440
+ }
441
+ }
442
+
443
+ uint64_t id = header_.n_rows;
444
+ header_.n_rows++;
445
+ file_size_ = new_size;
446
+
447
+ if (file_pwrite(fd_, &header_, sizeof(header_), 0) != static_cast<int64_t>(sizeof(header_)))
448
+ {
449
+ err = "pwrite header failed";
450
+ return UINT64_MAX;
451
+ }
452
+
453
+ // Extend mapping if file grew beyond current mapping
454
+ if (!extend_mapping_if_needed(err))
455
+ return UINT64_MAX;
456
+ return id;
457
+ }
458
+
459
+ uint64_t VectorStorage::append_batch(const float* data, int n, int dim, std::string& err)
460
+ {
461
+ if (fd_ < 0)
462
+ {
463
+ err = "not open";
464
+ return UINT64_MAX;
465
+ }
466
+ if (n <= 0)
467
+ {
468
+ return header_.n_rows;
469
+ }
470
+ if ((uint32_t)dim != header_.dim)
471
+ {
472
+ err = "dim mismatch on batch append";
473
+ return UINT64_MAX;
474
+ }
475
+
476
+ StorageDtype dtype = static_cast<StorageDtype>(header_.dtype);
477
+ size_t new_size = 0;
478
+ if (!checked_file_size(header_.n_rows + n, dim, dtype, new_size))
479
+ {
480
+ err = "storage size overflow";
481
+ return UINT64_MAX;
482
+ }
483
+
484
+ // Single ftruncate to final size
485
+ if (!platform::file_truncate(fd_, new_size))
486
+ {
487
+ err = std::string("ftruncate: ") + strerror(errno);
488
+ return UINT64_MAX;
489
+ }
490
+
491
+ size_t stride = row_stride(dim, dtype);
492
+ size_t total_bytes = (size_t)n * stride;
493
+ size_t offset = sizeof(StorageHeader) + header_.n_rows * stride;
494
+
495
+ // Convert and write based on dtype
496
+ if (dtype == DTYPE_FLOAT32)
497
+ {
498
+ if (file_pwrite(fd_, data, total_bytes, offset) != static_cast<int64_t>(total_bytes))
499
+ {
500
+ err = "pwrite batch vec failed";
501
+ return UINT64_MAX;
502
+ }
503
+ }
504
+ else if (dtype == DTYPE_FLOAT16)
505
+ {
506
+ std::vector<uint16_t> half((size_t)n * dim);
507
+ for (int i = 0; i < n * dim; ++i)
508
+ {
509
+ half[i] = float32_to_float16(data[i]);
510
+ }
511
+ if (file_pwrite(fd_, half.data(), total_bytes, offset) != static_cast<int64_t>(total_bytes))
512
+ {
513
+ err = "pwrite batch float16 vec failed";
514
+ return UINT64_MAX;
515
+ }
516
+ }
517
+ else if (dtype == DTYPE_INT8)
518
+ {
519
+ // Compute max scale across batch
520
+ float max_scale = header_.scale;
521
+ for (int i = 0; i < n; ++i)
522
+ {
523
+ float vec_scale = compute_int8_scale(data + i * dim, dim);
524
+ if (vec_scale > max_scale)
525
+ max_scale = vec_scale;
526
+ }
527
+ if (max_scale > header_.scale)
528
+ {
529
+ header_.scale = max_scale;
530
+ }
531
+
532
+ std::vector<int8_t> quantized((size_t)n * dim);
533
+ for (int i = 0; i < n; ++i)
534
+ {
535
+ quantize_float32_to_int8(
536
+ data + i * dim, quantized.data() + i * dim, dim, header_.scale);
537
+ }
538
+ if (file_pwrite(fd_, quantized.data(), total_bytes, offset) !=
539
+ static_cast<int64_t>(total_bytes))
540
+ {
541
+ err = "pwrite batch int8 vec failed";
542
+ return UINT64_MAX;
543
+ }
544
+ }
545
+
546
+ uint64_t start_id = header_.n_rows;
547
+ header_.n_rows += n;
548
+ file_size_ = new_size;
549
+
550
+ // Update header once
551
+ if (file_pwrite(fd_, &header_, sizeof(header_), 0) != static_cast<int64_t>(sizeof(header_)))
552
+ {
553
+ err = "pwrite header failed";
554
+ return UINT64_MAX;
555
+ }
556
+
557
+ // Extend mapping if needed (single remap at end, or just extend if reserved)
558
+ if (!extend_mapping_if_needed(err))
559
+ return UINT64_MAX;
560
+ return start_id;
561
+ }
562
+
563
+ size_t VectorStorage::row_stride_bytes() const
564
+ {
565
+ return row_stride((int)header_.dim, static_cast<StorageDtype>(header_.dtype));
566
+ }
567
+
568
+ const void* VectorStorage::row_raw(uint64_t idx) const
569
+ {
570
+ if (!map_base_ || idx >= header_.n_rows)
571
+ return nullptr;
572
+ size_t stride = row_stride_bytes();
573
+ if (stride > 0 && idx > (SIZE_MAX - sizeof(StorageHeader)) / stride)
574
+ return nullptr;
575
+ size_t offset = sizeof(StorageHeader) + idx * stride;
576
+ if (offset + stride > map_size_)
577
+ return nullptr;
578
+ return map_base_ + offset;
579
+ }
580
+
581
+ void VectorStorage::row_to_float32(uint64_t idx, float* out) const
582
+ {
583
+ const void* raw = row_raw(idx);
584
+ if (!raw)
585
+ return;
586
+
587
+ int dim = (int)header_.dim;
588
+ StorageDtype dtype = static_cast<StorageDtype>(header_.dtype);
589
+
590
+ if (dtype == DTYPE_FLOAT32)
591
+ {
592
+ std::memcpy(out, raw, dim * sizeof(float));
593
+ }
594
+ else if (dtype == DTYPE_FLOAT16)
595
+ {
596
+ const uint16_t* half = static_cast<const uint16_t*>(raw);
597
+ for (int i = 0; i < dim; ++i)
598
+ {
599
+ out[i] = float16_to_float32(half[i]);
600
+ }
601
+ }
602
+ else if (dtype == DTYPE_INT8)
603
+ {
604
+ const int8_t* q = static_cast<const int8_t*>(raw);
605
+ dequantize_int8_to_float32(q, out, dim, header_.scale);
606
+ }
607
+ }
608
+
609
+ const void* VectorStorage::data_raw() const
610
+ {
611
+ if (!map_base_ || header_.n_rows == 0)
612
+ return nullptr;
613
+ return map_base_ + sizeof(StorageHeader);
614
+ }
615
+
616
+ void VectorStorage::data_to_float32(float* out) const
617
+ {
618
+ if (!map_base_ || header_.n_rows == 0)
619
+ return;
620
+
621
+ int dim = (int)header_.dim;
622
+ StorageDtype dtype = static_cast<StorageDtype>(header_.dtype);
623
+ size_t n = header_.n_rows;
624
+
625
+ if (dtype == DTYPE_FLOAT32)
626
+ {
627
+ std::memcpy(out, data_raw(), n * dim * sizeof(float));
628
+ }
629
+ else if (dtype == DTYPE_FLOAT16)
630
+ {
631
+ const uint16_t* half = static_cast<const uint16_t*>(data_raw());
632
+ for (size_t i = 0; i < n * dim; ++i)
633
+ {
634
+ out[i] = float16_to_float32(half[i]);
635
+ }
636
+ }
637
+ else if (dtype == DTYPE_INT8)
638
+ {
639
+ const int8_t* q = static_cast<const int8_t*>(data_raw());
640
+ for (size_t i = 0; i < n; ++i)
641
+ {
642
+ dequantize_int8_to_float32(q + i * dim, out + i * dim, dim, header_.scale);
643
+ }
644
+ }
645
+ }
646
+
647
+ bool VectorStorage::sync(std::string& err)
648
+ {
649
+ if (fd_ < 0)
650
+ {
651
+ err = "not open";
652
+ return false;
653
+ }
654
+ if (platform::file_sync(fd_) != 0)
655
+ {
656
+ err = std::string("fsync: ") + strerror(errno);
657
+ return false;
658
+ }
659
+ return true;
660
+ }
661
+
662
+ bool VectorStorage::reserve_mapping(size_t min_size, std::string& err)
663
+ {
664
+ unmap();
665
+
666
+ // Reserve at least DEFAULT_RESERVE_SIZE or min_size, rounded up
667
+ reserved_size_ = std::max(DEFAULT_RESERVE_SIZE, min_size);
668
+ reserved_size_ = (reserved_size_ + 4095) & ~4095ULL; // Round up to page size
669
+
670
+ #ifdef _WIN32
671
+ // Windows: use platform mapping
672
+ platform::mmap_close(platform_map_);
673
+ if (!platform::mmap_reserve(path_, reserved_size_, platform_map_, err))
674
+ {
675
+ return false;
676
+ }
677
+ map_base_ = platform_map_.data;
678
+ map_size_ = platform::mmap_commit(platform_map_, file_size_);
679
+ #elif defined(__linux__)
680
+ // Linux: use MAP_NORESERVE to reserve address space without committing memory
681
+ map_base_ =
682
+ (uint8_t*)mmap(nullptr, reserved_size_, PROT_READ, MAP_SHARED | MAP_NORESERVE, fd_, 0);
683
+ if (map_base_ == MAP_FAILED)
684
+ {
685
+ map_base_ = nullptr;
686
+ err = std::string("mmap reserve: ") + strerror(errno);
687
+ return false;
688
+ }
689
+ // Advise that we don't need the pages beyond current file size yet
690
+ if (file_size_ < reserved_size_)
691
+ {
692
+ madvise(map_base_ + file_size_, reserved_size_ - file_size_, MADV_DONTNEED);
693
+ }
694
+ map_size_ = file_size_;
695
+ #else
696
+ // macOS and others: fall back to regular mmap of current file size
697
+ // We don't reserve extra space on macOS due to lack of MAP_NORESERVE
698
+ map_base_ = (uint8_t*)mmap(nullptr, file_size_, PROT_READ, MAP_SHARED, fd_, 0);
699
+ if (map_base_ == MAP_FAILED)
700
+ {
701
+ map_base_ = nullptr;
702
+ err = std::string("mmap: ") + strerror(errno);
703
+ return false;
704
+ }
705
+ reserved_size_ = file_size_;
706
+ map_size_ = file_size_;
707
+ #endif
708
+ return true;
709
+ }
710
+
711
+ bool VectorStorage::extend_mapping_if_needed(std::string& err)
712
+ {
713
+ // If file hasn't grown beyond mapped size, nothing to do
714
+ if (file_size_ <= map_size_)
715
+ return true;
716
+
717
+ // If we have enough reserved space, just extend the active mapping
718
+ if (file_size_ <= reserved_size_)
719
+ {
720
+ #ifdef _WIN32
721
+ // Windows: commit more pages
722
+ map_size_ = platform::mmap_commit(platform_map_, file_size_);
723
+ #elif defined(__linux__)
724
+ // Linux: with MAP_SHARED, the mapping automatically covers new file data
725
+ // Just update our tracked size
726
+ map_size_ = file_size_;
727
+ #else
728
+ // macOS: need to remap since we didn't reserve extra space
729
+ if (!remap(err))
730
+ return false;
731
+ #endif
732
+ return true;
733
+ }
734
+
735
+ // Need to grow reservation - do a full remap
736
+ return reserve_mapping(file_size_, err);
737
+ }
738
+
739
+ bool VectorStorage::remap(std::string& err)
740
+ {
741
+ // Try to use reserve_mapping for better performance
742
+ return reserve_mapping(file_size_, err);
743
+ }
744
+
745
+ void VectorStorage::unmap()
746
+ {
747
+ #ifdef _WIN32
748
+ platform::mmap_close(platform_map_);
749
+ map_base_ = nullptr;
750
+ map_size_ = 0;
751
+ reserved_size_ = 0;
752
+ #else
753
+ if (map_base_ && reserved_size_ > 0)
754
+ {
755
+ munmap(map_base_, reserved_size_);
756
+ }
757
+ map_base_ = nullptr;
758
+ map_size_ = 0;
759
+ reserved_size_ = 0;
760
+ #endif
761
+ }
762
+
763
+ } // namespace internal
764
+ } // namespace logosdb