logosdb 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logosdb-0.2.0 → logosdb-0.2.2}/CHANGELOG +26 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/CMakeLists.txt +7 -3
- {logosdb-0.2.0 → logosdb-0.2.2}/PKG-INFO +2 -2
- {logosdb-0.2.0 → logosdb-0.2.2}/README.md +1 -1
- {logosdb-0.2.0 → logosdb-0.2.2}/include/logosdb/logosdb.h +2 -2
- {logosdb-0.2.0 → logosdb-0.2.2}/pyproject.toml +1 -1
- {logosdb-0.2.0 → logosdb-0.2.2}/src/logosdb.cpp +75 -11
- {logosdb-0.2.0 → logosdb-0.2.2}/src/metadata.cpp +33 -71
- logosdb-0.2.2/src/wal.cpp +333 -0
- logosdb-0.2.2/src/wal.h +93 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/tests/test_basic.cpp +183 -0
- logosdb-0.2.2/third_party/README.md +18 -0
- logosdb-0.2.2/third_party/nlohmann/json.hpp +24765 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/ci.yml +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/publish.yml +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/python.yml +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/.gitignore +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/LICENSE +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/RELEASING.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/examples/python/basic_usage.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/examples/python/sentence_transformers_demo.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/__init__.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/_core.pyi +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/py.typed +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/python/src/bindings.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/src/hnsw_index.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/src/hnsw_index.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/src/metadata.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/src/storage.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/src/storage.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/tests/python/test_smoke.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/.github/workflows/build.yml +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/.gitignore +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/ALGO_PARAMS.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/CMakeLists.txt +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/LICENSE +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/MANIFEST.in +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/Makefile +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/README.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/TESTING_RECALL.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/EXAMPLES.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_epsilon_search.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_filter.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_filter.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_replace_deleted.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_search.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_multivector_search.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_replace_deleted.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_search.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/EXAMPLES.md +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_filter.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_replace_deleted.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_search.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_serialization.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/pyw_hnswlib.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/bruteforce.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/hnswalg.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/hnswlib.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/space_ip.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/space_l2.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/stop_condition.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/visited_list_pool.h +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/pyproject.toml +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/LazyIndex.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/__init__.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/bindings.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/setup.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/tests/bindings_test_bf_index.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/setup.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/download_bigann.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/epsilon_search_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/main.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multiThreadLoad_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multiThread_replace_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multivector_search_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/searchKnnCloserFirst_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/searchKnnWithFilter_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/sift_1b.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/sift_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/update_gen_data.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/updates_test.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_filter.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_getdata.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_labels.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_metadata.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_pickle.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_recall.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_replace.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_resize.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_spaces.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_stress_mt_replace.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/draw_git_test_plots.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/git_tester.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/speedtest.py +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/tools/logosdb-bench.cpp +0 -0
- {logosdb-0.2.0 → logosdb-0.2.2}/tools/logosdb-cli.cpp +0 -0
|
@@ -1,6 +1,32 @@
|
|
|
1
1
|
logosdb change log
|
|
2
2
|
==================
|
|
3
3
|
|
|
4
|
+
0.2.2 (2026-04-20)
|
|
5
|
+
-------------------
|
|
6
|
+
|
|
7
|
+
* Introduced Write-Ahead Log (WAL) for atomic Put operations. Closes #2.
|
|
8
|
+
- New `src/wal.{h,cpp}` implementing `WriteAheadLog` class with binary
|
|
9
|
+
append-only format, state tracking (PENDING/COMMITTED), and replay.
|
|
10
|
+
- `logosdb_put` now writes to WAL first (durability point), then modifies
|
|
11
|
+
vector storage, metadata, and HNSW index. On success, WAL entry is marked
|
|
12
|
+
committed; on crash, pending entries are replayed on next open.
|
|
13
|
+
- `logosdb_open` replays any pending WAL entries before serving requests,
|
|
14
|
+
ensuring consistency across all three stores after partial failures.
|
|
15
|
+
- New `wal.log` file in database directory; backward compatible with
|
|
16
|
+
existing databases (WAL is empty on first open).
|
|
17
|
+
- New test `test_wal_crash_recovery` validates crash recovery behavior.
|
|
18
|
+
|
|
19
|
+
0.2.1 (2026-04-20)
|
|
20
|
+
-------------------
|
|
21
|
+
|
|
22
|
+
* Replaced hand-rolled JSON parser with nlohmann/json (v3.11.3, single-header,
|
|
23
|
+
vendored in third_party/nlohmann/). This fixes multiple parsing edge cases:
|
|
24
|
+
unicode escape sequences (\uXXXX), key ordering independence, extra
|
|
25
|
+
whitespace tolerance, and proper escape handling. Closes #5.
|
|
26
|
+
Added regression tests for unicode, empty strings, complex backslash
|
|
27
|
+
escapes, and key-order variations. All 130 C++ tests pass; total assertions
|
|
28
|
+
up from 115 to 130.
|
|
29
|
+
|
|
4
30
|
0.2.0 (2026-04-17)
|
|
5
31
|
-------------------
|
|
6
32
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
cmake_minimum_required(VERSION 3.15)
|
|
2
|
-
project(logosdb VERSION 0.2.
|
|
2
|
+
project(logosdb VERSION 0.2.2 LANGUAGES CXX)
|
|
3
3
|
|
|
4
4
|
set(CMAKE_CXX_STANDARD 17)
|
|
5
5
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
@@ -8,22 +8,26 @@ option(LOGOSDB_BUILD_TOOLS "Build CLI and benchmark tools" ON)
|
|
|
8
8
|
option(LOGOSDB_BUILD_TESTS "Build unit tests" ON)
|
|
9
9
|
option(LOGOSDB_BUILD_PYTHON "Build Python bindings (pybind11)" OFF)
|
|
10
10
|
|
|
11
|
-
# hnswlib
|
|
11
|
+
# hnswlib and nlohmann/json are header-only
|
|
12
12
|
add_library(hnswlib INTERFACE)
|
|
13
13
|
target_include_directories(hnswlib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
|
|
14
14
|
|
|
15
|
+
add_library(nlohmann_json INTERFACE)
|
|
16
|
+
target_include_directories(nlohmann_json INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
|
|
17
|
+
|
|
15
18
|
# Core library
|
|
16
19
|
add_library(logosdb
|
|
17
20
|
src/logosdb.cpp
|
|
18
21
|
src/storage.cpp
|
|
19
22
|
src/metadata.cpp
|
|
20
23
|
src/hnsw_index.cpp
|
|
24
|
+
src/wal.cpp
|
|
21
25
|
)
|
|
22
26
|
target_include_directories(logosdb PUBLIC
|
|
23
27
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
24
28
|
${CMAKE_CURRENT_SOURCE_DIR}/src
|
|
25
29
|
)
|
|
26
|
-
target_link_libraries(logosdb PRIVATE hnswlib)
|
|
30
|
+
target_link_libraries(logosdb PRIVATE hnswlib nlohmann_json)
|
|
27
31
|
|
|
28
32
|
# Position-independent code is required when the static library is linked
|
|
29
33
|
# into a shared object (the Python extension is a .so).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: logosdb
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Fast semantic vector database (HNSW + mmap) with Python bindings
|
|
5
5
|
Keywords: vector-database,hnsw,embeddings,semantic-search,nearest-neighbor
|
|
6
6
|
Author-Email: Jose <jose-compu@users.noreply.github.com>
|
|
@@ -233,7 +233,7 @@ Here is a performance report from the included `logosdb-bench` program. The resu
|
|
|
233
233
|
|
|
234
234
|
We use databases with 1K, 10K, and 100K vectors. Each vector has 2048 dimensions (matching typical LLM embedding sizes). Vectors are L2-normalized random unit vectors.
|
|
235
235
|
|
|
236
|
-
LogosDB: version 0.2.
|
|
236
|
+
LogosDB: version 0.2.2
|
|
237
237
|
CPU: Apple M-series (ARM64)
|
|
238
238
|
Dim: 2048
|
|
239
239
|
HNSW M: 16, ef_construction: 200, ef_search: 50
|
|
@@ -185,7 +185,7 @@ Here is a performance report from the included `logosdb-bench` program. The resu
|
|
|
185
185
|
|
|
186
186
|
We use databases with 1K, 10K, and 100K vectors. Each vector has 2048 dimensions (matching typical LLM embedding sizes). Vectors are L2-normalized random unit vectors.
|
|
187
187
|
|
|
188
|
-
LogosDB: version 0.2.
|
|
188
|
+
LogosDB: version 0.2.2
|
|
189
189
|
CPU: Apple M-series (ARM64)
|
|
190
190
|
Dim: 2048
|
|
191
191
|
HNSW M: 16, ef_construction: 200, ef_search: 50
|
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
#define LOGOSDB_VERSION_MAJOR 0
|
|
8
8
|
#define LOGOSDB_VERSION_MINOR 2
|
|
9
|
-
#define LOGOSDB_VERSION_PATCH
|
|
10
|
-
#define LOGOSDB_VERSION_STRING "0.2.
|
|
9
|
+
#define LOGOSDB_VERSION_PATCH 2
|
|
10
|
+
#define LOGOSDB_VERSION_STRING "0.2.2"
|
|
11
11
|
|
|
12
12
|
#ifdef __cplusplus
|
|
13
13
|
extern "C" {
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
#include "storage.h"
|
|
3
3
|
#include "metadata.h"
|
|
4
4
|
#include "hnsw_index.h"
|
|
5
|
+
#include "wal.h"
|
|
5
6
|
|
|
6
7
|
#include <cstdlib>
|
|
7
8
|
#include <cstring>
|
|
8
9
|
#include <filesystem>
|
|
10
|
+
#include <functional>
|
|
9
11
|
#include <mutex>
|
|
10
12
|
#include <string>
|
|
11
13
|
#include <vector>
|
|
@@ -15,11 +17,12 @@ using namespace logosdb::internal;
|
|
|
15
17
|
/* ── Internal DB struct ────────────────────────────────────────────── */
|
|
16
18
|
|
|
17
19
|
struct logosdb_t {
|
|
18
|
-
VectorStorage
|
|
19
|
-
MetadataStore
|
|
20
|
-
HnswIndex
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
VectorStorage vectors;
|
|
21
|
+
MetadataStore meta;
|
|
22
|
+
HnswIndex index;
|
|
23
|
+
WriteAheadLog wal;
|
|
24
|
+
std::mutex mu;
|
|
25
|
+
int dim = 0;
|
|
23
26
|
};
|
|
24
27
|
|
|
25
28
|
struct logosdb_options_t {
|
|
@@ -80,6 +83,7 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
|
|
|
80
83
|
std::string vec_path = std::string(path) + "/vectors.bin";
|
|
81
84
|
std::string meta_path = std::string(path) + "/meta.jsonl";
|
|
82
85
|
std::string idx_path = std::string(path) + "/hnsw.idx";
|
|
86
|
+
std::string wal_path = std::string(path) + "/wal.log";
|
|
83
87
|
|
|
84
88
|
if (!db->vectors.open(vec_path, opts->dim, err)) {
|
|
85
89
|
set_err(errptr, err);
|
|
@@ -105,6 +109,48 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
|
|
|
105
109
|
return nullptr;
|
|
106
110
|
}
|
|
107
111
|
|
|
112
|
+
// Open WAL and replay any pending entries for atomic recovery.
|
|
113
|
+
if (!db->wal.open(wal_path, err)) {
|
|
114
|
+
set_err(errptr, err);
|
|
115
|
+
delete db;
|
|
116
|
+
return nullptr;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Replay pending WAL entries to ensure consistency.
|
|
120
|
+
int replayed = db->wal.replay_pending(
|
|
121
|
+
[&db](const WALEntry & entry, std::string & replay_err) -> bool {
|
|
122
|
+
// Validate: expected_id should match current row count
|
|
123
|
+
if (entry.expected_id != db->vectors.n_rows()) {
|
|
124
|
+
replay_err = "wal replay: expected_id mismatch (" +
|
|
125
|
+
std::to_string(entry.expected_id) + " vs " +
|
|
126
|
+
std::to_string(db->vectors.n_rows()) + ")";
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Replay vector
|
|
131
|
+
uint64_t vid = db->vectors.append(entry.vector.data(), (int)entry.dim, replay_err);
|
|
132
|
+
if (vid == UINT64_MAX) return false;
|
|
133
|
+
|
|
134
|
+
// Replay metadata
|
|
135
|
+
uint64_t mid = db->meta.append(entry.text.c_str(), entry.timestamp.c_str(), replay_err);
|
|
136
|
+
if (mid == UINT64_MAX) return false;
|
|
137
|
+
|
|
138
|
+
// Replay index
|
|
139
|
+
if (!db->index.add(vid, entry.vector.data(), replay_err)) {
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return true;
|
|
144
|
+
},
|
|
145
|
+
err
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
if (replayed < 0) {
|
|
149
|
+
set_err(errptr, "wal replay: " + err);
|
|
150
|
+
delete db;
|
|
151
|
+
return nullptr;
|
|
152
|
+
}
|
|
153
|
+
|
|
108
154
|
// Backfill index if vector storage has more rows than the index (e.g. crash recovery).
|
|
109
155
|
size_t n_vec = db->vectors.n_rows();
|
|
110
156
|
size_t n_idx = db->index.count();
|
|
@@ -144,9 +190,11 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
|
|
|
144
190
|
void logosdb_close(logosdb_t * db) {
|
|
145
191
|
if (!db) return;
|
|
146
192
|
std::string err;
|
|
193
|
+
db->wal.sync(err); // Ensure WAL is durable before closing other stores
|
|
147
194
|
db->index.save(err);
|
|
148
195
|
db->vectors.sync(err);
|
|
149
196
|
db->meta.sync(err);
|
|
197
|
+
db->wal.close();
|
|
150
198
|
delete db;
|
|
151
199
|
}
|
|
152
200
|
|
|
@@ -169,22 +217,38 @@ uint64_t logosdb_put(logosdb_t * db,
|
|
|
169
217
|
std::lock_guard<std::mutex> lock(db->mu);
|
|
170
218
|
std::string err;
|
|
171
219
|
|
|
172
|
-
//
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
//
|
|
176
|
-
|
|
220
|
+
// Compute expected row id before writing
|
|
221
|
+
uint64_t expected_id = db->vectors.n_rows();
|
|
222
|
+
|
|
223
|
+
// Step 1: Write WAL entry (durability point)
|
|
224
|
+
int64_t wal_offset = db->wal.append_pending(embedding, dim, text, timestamp, expected_id, err);
|
|
225
|
+
if (wal_offset < 0) { set_err(errptr, err); return UINT64_MAX; }
|
|
226
|
+
|
|
227
|
+
// Step 2: Write to vector storage
|
|
177
228
|
uint64_t vid = db->vectors.append(embedding, dim, err);
|
|
178
229
|
if (vid == UINT64_MAX) { set_err(errptr, err); return UINT64_MAX; }
|
|
179
230
|
|
|
231
|
+
// Step 3: Write to metadata storage
|
|
180
232
|
uint64_t mid = db->meta.append(text, timestamp, err);
|
|
181
|
-
if (mid == UINT64_MAX) {
|
|
233
|
+
if (mid == UINT64_MAX) {
|
|
234
|
+
// Metadata write failed - entry remains in WAL for replay on recovery
|
|
235
|
+
set_err(errptr, err);
|
|
236
|
+
return UINT64_MAX;
|
|
237
|
+
}
|
|
182
238
|
|
|
239
|
+
// Step 4: Write to HNSW index
|
|
183
240
|
if (!db->index.add(vid, embedding, err)) {
|
|
241
|
+
// Index write failed - entry remains in WAL for replay on recovery
|
|
184
242
|
set_err(errptr, err);
|
|
185
243
|
return UINT64_MAX;
|
|
186
244
|
}
|
|
187
245
|
|
|
246
|
+
// Step 5: Mark WAL entry as committed
|
|
247
|
+
if (!db->wal.mark_committed(wal_offset, err)) {
|
|
248
|
+
// Non-fatal: entry will be replayed on next open if needed
|
|
249
|
+
// Log but don't fail the operation
|
|
250
|
+
}
|
|
251
|
+
|
|
188
252
|
return vid;
|
|
189
253
|
}
|
|
190
254
|
|
|
@@ -1,65 +1,17 @@
|
|
|
1
1
|
#include "metadata.h"
|
|
2
2
|
|
|
3
|
+
#include <nlohmann/json.hpp>
|
|
4
|
+
|
|
3
5
|
#include <cerrno>
|
|
4
6
|
#include <cstring>
|
|
5
7
|
#include <fstream>
|
|
6
|
-
#include <sstream>
|
|
7
8
|
#include <fcntl.h>
|
|
8
9
|
#include <unistd.h>
|
|
9
10
|
|
|
10
11
|
namespace logosdb {
|
|
11
12
|
namespace internal {
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
static std::string json_escape(const std::string & s) {
|
|
16
|
-
std::string out;
|
|
17
|
-
out.reserve(s.size() + 8);
|
|
18
|
-
for (char c : s) {
|
|
19
|
-
switch (c) {
|
|
20
|
-
case '"': out += "\\\""; break;
|
|
21
|
-
case '\\': out += "\\\\"; break;
|
|
22
|
-
case '\n': out += "\\n"; break;
|
|
23
|
-
case '\r': out += "\\r"; break;
|
|
24
|
-
case '\t': out += "\\t"; break;
|
|
25
|
-
default: out += c;
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
return out;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
static std::string json_unescape(const std::string & s) {
|
|
32
|
-
std::string out;
|
|
33
|
-
out.reserve(s.size());
|
|
34
|
-
for (size_t i = 0; i < s.size(); ++i) {
|
|
35
|
-
if (s[i] == '\\' && i + 1 < s.size()) {
|
|
36
|
-
switch (s[i + 1]) {
|
|
37
|
-
case '"': out += '"'; ++i; break;
|
|
38
|
-
case '\\': out += '\\'; ++i; break;
|
|
39
|
-
case 'n': out += '\n'; ++i; break;
|
|
40
|
-
case 'r': out += '\r'; ++i; break;
|
|
41
|
-
case 't': out += '\t'; ++i; break;
|
|
42
|
-
default: out += s[i];
|
|
43
|
-
}
|
|
44
|
-
} else {
|
|
45
|
-
out += s[i];
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return out;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
static std::string extract_field(const std::string & line, const std::string & key) {
|
|
52
|
-
std::string needle = "\"" + key + "\":\"";
|
|
53
|
-
auto pos = line.find(needle);
|
|
54
|
-
if (pos == std::string::npos) return "";
|
|
55
|
-
pos += needle.size();
|
|
56
|
-
auto end = line.find('"', pos);
|
|
57
|
-
while (end != std::string::npos && end > 0 && line[end - 1] == '\\') {
|
|
58
|
-
end = line.find('"', end + 1);
|
|
59
|
-
}
|
|
60
|
-
if (end == std::string::npos) return "";
|
|
61
|
-
return json_unescape(line.substr(pos, end - pos));
|
|
62
|
-
}
|
|
14
|
+
using json = nlohmann::json;
|
|
63
15
|
|
|
64
16
|
MetadataStore::~MetadataStore() { close(); }
|
|
65
17
|
|
|
@@ -78,29 +30,34 @@ bool MetadataStore::open(const std::string & path, std::string & err) {
|
|
|
78
30
|
std::string line;
|
|
79
31
|
while (std::getline(in, line)) {
|
|
80
32
|
if (line.empty()) continue;
|
|
33
|
+
|
|
34
|
+
// Try to parse as JSON
|
|
35
|
+
json j;
|
|
36
|
+
try {
|
|
37
|
+
j = json::parse(line);
|
|
38
|
+
} catch (const json::exception & e) {
|
|
39
|
+
// Invalid JSON line - skip but don't fail
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
|
|
81
43
|
// Tombstone record: {"op":"del","id":N}
|
|
82
|
-
if (
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if (pos == std::string::npos) continue;
|
|
86
|
-
pos += key.size();
|
|
87
|
-
uint64_t id = 0;
|
|
88
|
-
bool got_digit = false;
|
|
89
|
-
while (pos < line.size() && (line[pos] == ' ' || line[pos] == '\t')) ++pos;
|
|
90
|
-
while (pos < line.size() && line[pos] >= '0' && line[pos] <= '9') {
|
|
91
|
-
id = id * 10 + (uint64_t)(line[pos] - '0');
|
|
92
|
-
got_digit = true;
|
|
93
|
-
++pos;
|
|
94
|
-
}
|
|
95
|
-
if (got_digit && id < rows_.size() && !rows_[id].deleted) {
|
|
44
|
+
if (j.contains("op") && j["op"] == "del" && j.contains("id")) {
|
|
45
|
+
uint64_t id = j["id"].get<uint64_t>();
|
|
46
|
+
if (id < rows_.size() && !rows_[id].deleted) {
|
|
96
47
|
rows_[id].deleted = true;
|
|
97
48
|
++num_deleted_;
|
|
98
49
|
}
|
|
99
50
|
continue;
|
|
100
51
|
}
|
|
52
|
+
|
|
53
|
+
// Data row: {"text":"...","ts":"..."}
|
|
101
54
|
MetaRow r;
|
|
102
|
-
|
|
103
|
-
|
|
55
|
+
if (j.contains("text")) {
|
|
56
|
+
r.text = j["text"].get<std::string>();
|
|
57
|
+
}
|
|
58
|
+
if (j.contains("ts")) {
|
|
59
|
+
r.timestamp = j["ts"].get<std::string>();
|
|
60
|
+
}
|
|
104
61
|
rows_.push_back(std::move(r));
|
|
105
62
|
}
|
|
106
63
|
}
|
|
@@ -121,9 +78,10 @@ uint64_t MetadataStore::append(const char * text, const char * timestamp,
|
|
|
121
78
|
std::string & err) {
|
|
122
79
|
if (fd_ < 0) { err = "meta not open"; return UINT64_MAX; }
|
|
123
80
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
81
|
+
json j;
|
|
82
|
+
j["text"] = text ? text : "";
|
|
83
|
+
j["ts"] = timestamp ? timestamp : "";
|
|
84
|
+
std::string line = j.dump() + "\n";
|
|
127
85
|
|
|
128
86
|
ssize_t written = ::write(fd_, line.data(), line.size());
|
|
129
87
|
if (written != (ssize_t)line.size()) {
|
|
@@ -147,7 +105,11 @@ bool MetadataStore::mark_deleted(uint64_t id, std::string & err) {
|
|
|
147
105
|
return false;
|
|
148
106
|
}
|
|
149
107
|
|
|
150
|
-
|
|
108
|
+
json j;
|
|
109
|
+
j["op"] = "del";
|
|
110
|
+
j["id"] = id;
|
|
111
|
+
std::string line = j.dump() + "\n";
|
|
112
|
+
|
|
151
113
|
ssize_t written = ::write(fd_, line.data(), line.size());
|
|
152
114
|
if (written != (ssize_t)line.size()) {
|
|
153
115
|
err = std::string("write tombstone: ") + strerror(errno);
|