logosdb 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,266 @@
1
+ #include "metadata.h"
2
+
3
+ #include <fcntl.h>
4
+ #include <sys/types.h>
5
+
6
+ #include <cerrno>
7
+ #include <cstring>
8
+ #include <fstream>
9
+ #include <nlohmann/json.hpp>
10
+
11
+ #ifdef _WIN32
12
+ #include <io.h>
13
+ #else
14
+ #include <unistd.h>
15
+ #endif
16
+
17
+ namespace logosdb
18
+ {
19
+ namespace internal
20
+ {
21
+
22
+ using json = nlohmann::json;
23
+
24
+ MetadataStore::~MetadataStore()
25
+ {
26
+ close();
27
+ }
28
+
29
+ bool MetadataStore::open(const std::string& path, std::string& err)
30
+ {
31
+ close();
32
+ path_ = path;
33
+
34
+ #ifdef _WIN32
35
+ int flags = O_RDWR | O_CREAT | O_APPEND | O_BINARY;
36
+ #else
37
+ int flags = O_RDWR | O_CREAT | O_APPEND;
38
+ #endif
39
+ fd_ = ::open(path.c_str(), flags, 0644);
40
+ if (fd_ < 0)
41
+ {
42
+ err = std::string("open meta: ") + strerror(errno);
43
+ return false;
44
+ }
45
+
46
+ std::ifstream in(path);
47
+ if (in.good())
48
+ {
49
+ std::string line;
50
+ while (std::getline(in, line))
51
+ {
52
+ if (line.empty())
53
+ continue;
54
+
55
+ // Try to parse as JSON
56
+ json j;
57
+ try
58
+ {
59
+ j = json::parse(line);
60
+ }
61
+ catch (const json::exception& e)
62
+ {
63
+ // Invalid JSON line - skip but don't fail
64
+ continue;
65
+ }
66
+
67
+ // Tombstone record: {"op":"del","id":N}
68
+ if (j.contains("op") && j["op"] == "del" && j.contains("id"))
69
+ {
70
+ uint64_t id = j["id"].get<uint64_t>();
71
+ if (id < rows_.size() && !rows_[id].deleted)
72
+ {
73
+ rows_[id].deleted = true;
74
+ ++num_deleted_;
75
+ }
76
+ continue;
77
+ }
78
+
79
+ // Data row: {"text":"...","ts":"..."}
80
+ MetaRow r;
81
+ if (j.contains("text"))
82
+ {
83
+ r.text = j["text"].get<std::string>();
84
+ }
85
+ if (j.contains("ts"))
86
+ {
87
+ r.timestamp = j["ts"].get<std::string>();
88
+ }
89
+ rows_.push_back(std::move(r));
90
+ }
91
+ }
92
+ return true;
93
+ }
94
+
95
+ void MetadataStore::close()
96
+ {
97
+ if (fd_ >= 0)
98
+ {
99
+ #ifdef _WIN32
100
+ _close(fd_);
101
+ #else
102
+ ::close(fd_);
103
+ #endif
104
+ fd_ = -1;
105
+ }
106
+ rows_.clear();
107
+ num_deleted_ = 0;
108
+ path_.clear();
109
+ }
110
+
111
+ uint64_t MetadataStore::append(const char* text, const char* timestamp, std::string& err)
112
+ {
113
+ if (fd_ < 0)
114
+ {
115
+ err = "meta not open";
116
+ return UINT64_MAX;
117
+ }
118
+
119
+ json j;
120
+ j["text"] = text ? text : "";
121
+ j["ts"] = timestamp ? timestamp : "";
122
+ std::string line = j.dump() + "\n";
123
+
124
+ #ifdef _WIN32
125
+ int written = _write(fd_, line.data(), (int)line.size());
126
+ if (written != (int)line.size())
127
+ {
128
+ #else
129
+ ssize_t written = ::write(fd_, line.data(), line.size());
130
+ if (written != (ssize_t)line.size())
131
+ {
132
+ #endif
133
+ err = std::string("write meta: ") + strerror(errno);
134
+ return UINT64_MAX;
135
+ }
136
+
137
+ uint64_t id = rows_.size();
138
+ rows_.push_back({text ? text : "", timestamp ? timestamp : ""});
139
+ return id;
140
+ }
141
+
142
+ uint64_t MetadataStore::append_batch(const char* const* texts,
143
+ const char* const* timestamps,
144
+ int n,
145
+ std::string& err)
146
+ {
147
+ if (fd_ < 0)
148
+ {
149
+ err = "meta not open";
150
+ return UINT64_MAX;
151
+ }
152
+ if (n <= 0)
153
+ {
154
+ return rows_.size();
155
+ }
156
+
157
+ // Build all JSON lines and write in a single batch
158
+ std::string batch;
159
+ batch.reserve(n * 64); // rough estimate
160
+
161
+ for (int i = 0; i < n; ++i)
162
+ {
163
+ json j;
164
+ j["text"] = texts && texts[i] ? texts[i] : "";
165
+ j["ts"] = timestamps && timestamps[i] ? timestamps[i] : "";
166
+ batch += j.dump();
167
+ batch += "\n";
168
+ }
169
+
170
+ #ifdef _WIN32
171
+ int written = _write(fd_, batch.data(), (int)batch.size());
172
+ if (written != (int)batch.size())
173
+ {
174
+ #else
175
+ ssize_t written = ::write(fd_, batch.data(), batch.size());
176
+ if (written != (ssize_t)batch.size())
177
+ {
178
+ #endif
179
+ err = std::string("write meta batch: ") + strerror(errno);
180
+ return UINT64_MAX;
181
+ }
182
+
183
+ uint64_t start_id = rows_.size();
184
+ for (int i = 0; i < n; ++i)
185
+ {
186
+ rows_.push_back(
187
+ {texts && texts[i] ? texts[i] : "", timestamps && timestamps[i] ? timestamps[i] : ""});
188
+ }
189
+ return start_id;
190
+ }
191
+
192
+ bool MetadataStore::mark_deleted(uint64_t id, std::string& err)
193
+ {
194
+ if (fd_ < 0)
195
+ {
196
+ err = "meta not open";
197
+ return false;
198
+ }
199
+ if (id >= rows_.size())
200
+ {
201
+ err = "delete: id out of range";
202
+ return false;
203
+ }
204
+ if (rows_[id].deleted)
205
+ {
206
+ err = "delete: id already deleted";
207
+ return false;
208
+ }
209
+
210
+ json j;
211
+ j["op"] = "del";
212
+ j["id"] = id;
213
+ std::string line = j.dump() + "\n";
214
+
215
+ #ifdef _WIN32
216
+ int written = _write(fd_, line.data(), (int)line.size());
217
+ if (written != (int)line.size())
218
+ {
219
+ #else
220
+ ssize_t written = ::write(fd_, line.data(), line.size());
221
+ if (written != (ssize_t)line.size())
222
+ {
223
+ #endif
224
+ err = std::string("write tombstone: ") + strerror(errno);
225
+ return false;
226
+ }
227
+
228
+ rows_[id].deleted = true;
229
+ ++num_deleted_;
230
+ return true;
231
+ }
232
+
233
+ std::vector<uint64_t> MetadataStore::deleted_ids() const
234
+ {
235
+ std::vector<uint64_t> out;
236
+ out.reserve(num_deleted_);
237
+ for (size_t i = 0; i < rows_.size(); ++i)
238
+ {
239
+ if (rows_[i].deleted)
240
+ out.push_back((uint64_t)i);
241
+ }
242
+ return out;
243
+ }
244
+
245
+ bool MetadataStore::sync(std::string& err)
246
+ {
247
+ if (fd_ < 0)
248
+ {
249
+ err = "meta not open";
250
+ return false;
251
+ }
252
+ #ifdef _WIN32
253
+ if (_commit(fd_) != 0)
254
+ {
255
+ #else
256
+ if (::fsync(fd_) != 0)
257
+ {
258
+ #endif
259
+ err = std::string("fsync meta: ") + strerror(errno);
260
+ return false;
261
+ }
262
+ return true;
263
+ }
264
+
265
+ } // namespace internal
266
+ } // namespace logosdb
@@ -0,0 +1,69 @@
1
+ #pragma once
2
+
3
+ #include <cstdint>
4
+ #include <string>
5
+ #include <vector>
6
+
7
+ namespace logosdb
8
+ {
9
+ namespace internal
10
+ {
11
+
12
+ struct MetaRow
13
+ {
14
+ std::string text;
15
+ std::string timestamp;
16
+ bool deleted = false;
17
+ };
18
+
19
+ // Append-only JSONL metadata store.
20
+ //
21
+ // Data rows: {"text":"...","ts":"..."}
22
+ // Tombstone rows: {"op":"del","id":N}
23
+ //
24
+ // Tombstones mark an earlier data row as logically deleted. They do not
25
+ // occupy a row index themselves.
26
+ class MetadataStore
27
+ {
28
+ public:
29
+ MetadataStore() = default;
30
+ ~MetadataStore();
31
+
32
+ MetadataStore(const MetadataStore&) = delete;
33
+ MetadataStore& operator=(const MetadataStore&) = delete;
34
+
35
+ bool open(const std::string& path, std::string& err);
36
+ void close();
37
+
38
+ uint64_t append(const char* text, const char* timestamp, std::string& err);
39
+
40
+ /* Append n metadata rows efficiently. Returns the starting id, or UINT64_MAX on error. */
41
+ uint64_t
42
+ append_batch(const char* const* texts, const char* const* timestamps, int n, std::string& err);
43
+
44
+ // Append a tombstone for `id`. Returns false if the id is out of range
45
+ // or already deleted. `err` is set on failure.
46
+ bool mark_deleted(uint64_t id, std::string& err);
47
+
48
+ size_t count() const { return rows_.size(); }
49
+ size_t deleted_count() const { return num_deleted_; }
50
+
51
+ bool is_deleted(uint64_t id) const { return id < rows_.size() && rows_[id].deleted; }
52
+
53
+ const MetaRow* row(uint64_t idx) const { return idx < rows_.size() ? &rows_[idx] : nullptr; }
54
+
55
+ // Iterate over all currently-tombstoned ids. Used by the DB on open to
56
+ // re-apply deletion marks to a freshly-rebuilt HNSW index.
57
+ std::vector<uint64_t> deleted_ids() const;
58
+
59
+ bool sync(std::string& err);
60
+
61
+ private:
62
+ std::string path_;
63
+ int fd_ = -1;
64
+ std::vector<MetaRow> rows_;
65
+ size_t num_deleted_ = 0;
66
+ };
67
+
68
+ } // namespace internal
69
+ } // namespace logosdb
@@ -0,0 +1,342 @@
1
+ #include "platform.h"
2
+
3
+ #include <fcntl.h>
4
+ #include <sys/stat.h>
5
+ #include <sys/types.h>
6
+
7
+ #include <cerrno>
8
+ #include <cstring>
9
+
10
+ #ifdef _WIN32
11
+ #include <io.h>
12
+ #else
13
+ #include <unistd.h>
14
+ #endif
15
+
16
+ namespace logosdb
17
+ {
18
+ namespace internal
19
+ {
20
+ namespace platform
21
+ {
22
+
23
+ bool file_exists(const std::string& path)
24
+ {
25
+ #ifdef _WIN32
26
+ struct _stat st;
27
+ return _stat(path.c_str(), &st) == 0;
28
+ #else
29
+ struct stat st;
30
+ return stat(path.c_str(), &st) == 0;
31
+ #endif
32
+ }
33
+
34
+ #ifdef _WIN32
35
+ // Windows memory mapping implementation
36
+
37
+ bool mmap_open(const std::string& path, size_t& out_size, MappedFile& out_map, std::string& err)
38
+ {
39
+ out_map = {}; // Clear
40
+
41
+ HANDLE file_handle = CreateFileA(path.c_str(),
42
+ GENERIC_READ,
43
+ FILE_SHARE_READ,
44
+ nullptr,
45
+ OPEN_EXISTING,
46
+ FILE_ATTRIBUTE_NORMAL,
47
+ nullptr);
48
+
49
+ if (file_handle == INVALID_HANDLE_VALUE)
50
+ {
51
+ err = "CreateFileA failed: " + std::to_string(GetLastError());
52
+ return false;
53
+ }
54
+
55
+ LARGE_INTEGER file_size;
56
+ if (!GetFileSizeEx(file_handle, &file_size))
57
+ {
58
+ err = "GetFileSizeEx failed: " + std::to_string(GetLastError());
59
+ CloseHandle(file_handle);
60
+ return false;
61
+ }
62
+
63
+ if (file_size.QuadPart == 0)
64
+ {
65
+ // Empty file - no mapping needed
66
+ out_map.file_handle = file_handle;
67
+ out_map.data = nullptr;
68
+ out_map.size = 0;
69
+ out_size = 0;
70
+ return true;
71
+ }
72
+
73
+ HANDLE map_handle = CreateFileMapping(file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
74
+
75
+ if (map_handle == INVALID_HANDLE_VALUE)
76
+ {
77
+ err = "CreateFileMapping failed: " + std::to_string(GetLastError());
78
+ CloseHandle(file_handle);
79
+ return false;
80
+ }
81
+
82
+ void* data = MapViewOfFile(map_handle, FILE_MAP_READ, 0, 0, 0);
83
+
84
+ if (!data)
85
+ {
86
+ err = "MapViewOfFile failed: " + std::to_string(GetLastError());
87
+ CloseHandle(map_handle);
88
+ CloseHandle(file_handle);
89
+ return false;
90
+ }
91
+
92
+ out_map.file_handle = file_handle;
93
+ out_map.map_handle = map_handle;
94
+ out_map.data = static_cast<uint8_t*>(data);
95
+ out_map.size = static_cast<size_t>(file_size.QuadPart);
96
+ out_size = out_map.size;
97
+ return true;
98
+ }
99
+
100
+ void mmap_close(MappedFile& map)
101
+ {
102
+ if (map.data)
103
+ {
104
+ UnmapViewOfFile(map.data);
105
+ map.data = nullptr;
106
+ }
107
+ if (map.map_handle != INVALID_HANDLE_VALUE)
108
+ {
109
+ CloseHandle(map.map_handle);
110
+ map.map_handle = INVALID_HANDLE_VALUE;
111
+ }
112
+ if (map.file_handle != INVALID_HANDLE_VALUE)
113
+ {
114
+ CloseHandle(map.file_handle);
115
+ map.file_handle = INVALID_HANDLE_VALUE;
116
+ }
117
+ map.size = 0;
118
+ }
119
+
120
+ bool mmap_resize(MappedFile& map, size_t new_size, std::string& err)
121
+ {
122
+ // On Windows, we need to close and reopen the mapping
123
+ // This is used when the file grows
124
+ mmap_close(map);
125
+
126
+ // For simplicity on Windows, we don't remap here - the caller
127
+ // should use mmap_open again after closing the file
128
+ err = "mmap_resize not implemented for Windows - use close/reopen pattern";
129
+ return false;
130
+ }
131
+
132
+ bool mmap_reserve(const std::string& path,
133
+ size_t reserve_size,
134
+ MappedFile& out_map,
135
+ std::string& err)
136
+ {
137
+ out_map = {}; // Clear
138
+
139
+ // First, get the current file size
140
+ HANDLE file_handle =
141
+ CreateFileA(path.c_str(),
142
+ GENERIC_READ,
143
+ FILE_SHARE_READ | FILE_SHARE_WRITE, // Allow others to write (file growth)
144
+ nullptr,
145
+ OPEN_EXISTING,
146
+ FILE_ATTRIBUTE_NORMAL,
147
+ nullptr);
148
+
149
+ if (file_handle == INVALID_HANDLE_VALUE)
150
+ {
151
+ err = "CreateFileA failed: " + std::to_string(GetLastError());
152
+ return false;
153
+ }
154
+
155
+ LARGE_INTEGER file_size;
156
+ if (!GetFileSizeEx(file_handle, &file_size))
157
+ {
158
+ err = "GetFileSizeEx failed: " + std::to_string(GetLastError());
159
+ CloseHandle(file_handle);
160
+ return false;
161
+ }
162
+
163
+ // Create a file mapping that reserves virtual address space
164
+ // We use a large maximum size for reservation, but only commit what's needed
165
+ HANDLE map_handle =
166
+ CreateFileMapping(file_handle,
167
+ nullptr,
168
+ PAGE_READONLY,
169
+ 0,
170
+ static_cast<DWORD>(reserve_size), // Maximum size for reservation
171
+ nullptr);
172
+
173
+ if (map_handle == INVALID_HANDLE_VALUE)
174
+ {
175
+ err = "CreateFileMapping failed: " + std::to_string(GetLastError());
176
+ CloseHandle(file_handle);
177
+ return false;
178
+ }
179
+
180
+ // Map only the current file size initially
181
+ void* data = MapViewOfFile(map_handle,
182
+ FILE_MAP_READ,
183
+ 0,
184
+ 0,
185
+ static_cast<size_t>(file_size.QuadPart) // Initial view size
186
+ );
187
+
188
+ if (!data)
189
+ {
190
+ err = "MapViewOfFile failed: " + std::to_string(GetLastError());
191
+ CloseHandle(map_handle);
192
+ CloseHandle(file_handle);
193
+ return false;
194
+ }
195
+
196
+ out_map.file_handle = file_handle;
197
+ out_map.map_handle = map_handle;
198
+ out_map.data = static_cast<uint8_t*>(data);
199
+ out_map.size = static_cast<size_t>(file_size.QuadPart);
200
+ return true;
201
+ }
202
+
203
+ size_t mmap_commit(MappedFile& map, size_t file_size)
204
+ {
205
+ #ifdef _WIN32
206
+ // On Windows with file mapping, the view automatically extends
207
+ // when the file grows (as long as we're within the mapping's max size)
208
+ // We just need to unmap and remap to the new size
209
+ if (map.data)
210
+ {
211
+ UnmapViewOfFile(map.data);
212
+ }
213
+
214
+ void* data = MapViewOfFile(map.map_handle,
215
+ FILE_MAP_READ,
216
+ 0,
217
+ 0,
218
+ file_size // New view size
219
+ );
220
+
221
+ if (!data)
222
+ {
223
+ return map.size; // Failed, keep old size
224
+ }
225
+
226
+ map.data = static_cast<uint8_t*>(data);
227
+ map.size = file_size;
228
+ return map.size;
229
+ #else
230
+ (void)map;
231
+ return file_size;
232
+ #endif
233
+ }
234
+
235
+ #else
236
+ // POSIX memory mapping implementation (Linux/macOS)
237
+
238
+ bool mmap_open(const std::string& path, size_t& out_size, MappedFile& out_map, std::string& err)
239
+ {
240
+ out_map = {}; // Clear
241
+
242
+ int fd = ::open(path.c_str(), O_RDONLY);
243
+ if (fd < 0)
244
+ {
245
+ err = std::string("open: ") + strerror(errno);
246
+ return false;
247
+ }
248
+
249
+ struct stat st;
250
+ if (fstat(fd, &st) != 0)
251
+ {
252
+ err = std::string("fstat: ") + strerror(errno);
253
+ ::close(fd);
254
+ return false;
255
+ }
256
+
257
+ size_t size = static_cast<size_t>(st.st_size);
258
+
259
+ if (size == 0)
260
+ {
261
+ // Empty file - no mapping needed
262
+ out_map.fd = fd;
263
+ out_map.data = nullptr;
264
+ out_map.size = 0;
265
+ out_size = 0;
266
+ return true;
267
+ }
268
+
269
+ void* data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0);
270
+ if (data == MAP_FAILED)
271
+ {
272
+ err = std::string("mmap: ") + strerror(errno);
273
+ ::close(fd);
274
+ return false;
275
+ }
276
+
277
+ out_map.fd = fd;
278
+ out_map.data = static_cast<uint8_t*>(data);
279
+ out_map.size = size;
280
+ out_size = size;
281
+ return true;
282
+ }
283
+
284
+ void mmap_close(MappedFile& map)
285
+ {
286
+ if (map.data && map.size > 0)
287
+ {
288
+ munmap(map.data, map.size);
289
+ }
290
+ if (map.fd >= 0)
291
+ {
292
+ ::close(map.fd);
293
+ }
294
+ map.data = nullptr;
295
+ map.fd = -1;
296
+ map.size = 0;
297
+ }
298
+
299
+ bool mmap_resize(MappedFile& map, size_t new_size, std::string& err)
300
+ {
301
+ // On POSIX, unmap and remap
302
+ if (map.data && map.size > 0)
303
+ {
304
+ munmap(map.data, map.size);
305
+ }
306
+
307
+ void* data = mmap(nullptr, new_size, PROT_READ, MAP_SHARED, map.fd, 0);
308
+ if (data == MAP_FAILED)
309
+ {
310
+ err = std::string("mmap resize: ") + strerror(errno);
311
+ return false;
312
+ }
313
+
314
+ map.data = static_cast<uint8_t*>(data);
315
+ map.size = new_size;
316
+ return true;
317
+ }
318
+
319
+ // Stub implementations for POSIX - reservation is handled directly in storage.cpp
320
+ bool mmap_reserve(const std::string& path,
321
+ size_t reserve_size,
322
+ MappedFile& out_map,
323
+ std::string& err)
324
+ {
325
+ (void)path;
326
+ (void)reserve_size;
327
+ (void)out_map;
328
+ err = "mmap_reserve not implemented for POSIX - use storage.cpp implementation";
329
+ return false;
330
+ }
331
+
332
+ size_t mmap_commit(MappedFile& map, size_t file_size)
333
+ {
334
+ (void)map;
335
+ return file_size;
336
+ }
337
+
338
+ #endif
339
+
340
+ } // namespace platform
341
+ } // namespace internal
342
+ } // namespace logosdb