logosdb 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {logosdb-0.2.0 → logosdb-0.2.2}/CHANGELOG +26 -0
  2. {logosdb-0.2.0 → logosdb-0.2.2}/CMakeLists.txt +7 -3
  3. {logosdb-0.2.0 → logosdb-0.2.2}/PKG-INFO +2 -2
  4. {logosdb-0.2.0 → logosdb-0.2.2}/README.md +1 -1
  5. {logosdb-0.2.0 → logosdb-0.2.2}/include/logosdb/logosdb.h +2 -2
  6. {logosdb-0.2.0 → logosdb-0.2.2}/pyproject.toml +1 -1
  7. {logosdb-0.2.0 → logosdb-0.2.2}/src/logosdb.cpp +75 -11
  8. {logosdb-0.2.0 → logosdb-0.2.2}/src/metadata.cpp +33 -71
  9. logosdb-0.2.2/src/wal.cpp +333 -0
  10. logosdb-0.2.2/src/wal.h +93 -0
  11. {logosdb-0.2.0 → logosdb-0.2.2}/tests/test_basic.cpp +183 -0
  12. logosdb-0.2.2/third_party/README.md +18 -0
  13. logosdb-0.2.2/third_party/nlohmann/json.hpp +24765 -0
  14. {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/ci.yml +0 -0
  15. {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/publish.yml +0 -0
  16. {logosdb-0.2.0 → logosdb-0.2.2}/.github/workflows/python.yml +0 -0
  17. {logosdb-0.2.0 → logosdb-0.2.2}/.gitignore +0 -0
  18. {logosdb-0.2.0 → logosdb-0.2.2}/LICENSE +0 -0
  19. {logosdb-0.2.0 → logosdb-0.2.2}/RELEASING.md +0 -0
  20. {logosdb-0.2.0 → logosdb-0.2.2}/examples/python/basic_usage.py +0 -0
  21. {logosdb-0.2.0 → logosdb-0.2.2}/examples/python/sentence_transformers_demo.py +0 -0
  22. {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/__init__.py +0 -0
  23. {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/_core.pyi +0 -0
  24. {logosdb-0.2.0 → logosdb-0.2.2}/python/logosdb/py.typed +0 -0
  25. {logosdb-0.2.0 → logosdb-0.2.2}/python/src/bindings.cpp +0 -0
  26. {logosdb-0.2.0 → logosdb-0.2.2}/src/hnsw_index.cpp +0 -0
  27. {logosdb-0.2.0 → logosdb-0.2.2}/src/hnsw_index.h +0 -0
  28. {logosdb-0.2.0 → logosdb-0.2.2}/src/metadata.h +0 -0
  29. {logosdb-0.2.0 → logosdb-0.2.2}/src/storage.cpp +0 -0
  30. {logosdb-0.2.0 → logosdb-0.2.2}/src/storage.h +0 -0
  31. {logosdb-0.2.0 → logosdb-0.2.2}/tests/python/test_smoke.py +0 -0
  32. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/.github/workflows/build.yml +0 -0
  33. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/.gitignore +0 -0
  34. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/ALGO_PARAMS.md +0 -0
  35. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/CMakeLists.txt +0 -0
  36. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/LICENSE +0 -0
  37. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/MANIFEST.in +0 -0
  38. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/Makefile +0 -0
  39. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/README.md +0 -0
  40. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/TESTING_RECALL.md +0 -0
  41. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/EXAMPLES.md +0 -0
  42. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_epsilon_search.cpp +0 -0
  43. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_filter.cpp +0 -0
  44. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_filter.cpp +0 -0
  45. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_replace_deleted.cpp +0 -0
  46. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_mt_search.cpp +0 -0
  47. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_multivector_search.cpp +0 -0
  48. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_replace_deleted.cpp +0 -0
  49. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/cpp/example_search.cpp +0 -0
  50. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/EXAMPLES.md +0 -0
  51. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example.py +0 -0
  52. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_filter.py +0 -0
  53. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_replace_deleted.py +0 -0
  54. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_search.py +0 -0
  55. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/example_serialization.py +0 -0
  56. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/examples/python/pyw_hnswlib.py +0 -0
  57. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/bruteforce.h +0 -0
  58. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/hnswalg.h +0 -0
  59. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/hnswlib.h +0 -0
  60. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/space_ip.h +0 -0
  61. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/space_l2.h +0 -0
  62. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/stop_condition.h +0 -0
  63. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/hnswlib/visited_list_pool.h +0 -0
  64. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/pyproject.toml +0 -0
  65. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/LazyIndex.py +0 -0
  66. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/__init__.py +0 -0
  67. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/bindings.cpp +0 -0
  68. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/setup.py +0 -0
  69. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/python_bindings/tests/bindings_test_bf_index.py +0 -0
  70. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/setup.py +0 -0
  71. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/download_bigann.py +0 -0
  72. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/epsilon_search_test.cpp +0 -0
  73. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/main.cpp +0 -0
  74. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multiThreadLoad_test.cpp +0 -0
  75. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multiThread_replace_test.cpp +0 -0
  76. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/multivector_search_test.cpp +0 -0
  77. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/searchKnnCloserFirst_test.cpp +0 -0
  78. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/searchKnnWithFilter_test.cpp +0 -0
  79. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/sift_1b.cpp +0 -0
  80. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/sift_test.cpp +0 -0
  81. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/update_gen_data.py +0 -0
  82. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/cpp/updates_test.cpp +0 -0
  83. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test.py +0 -0
  84. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_filter.py +0 -0
  85. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_getdata.py +0 -0
  86. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_labels.py +0 -0
  87. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_metadata.py +0 -0
  88. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_pickle.py +0 -0
  89. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_recall.py +0 -0
  90. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_replace.py +0 -0
  91. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_resize.py +0 -0
  92. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_spaces.py +0 -0
  93. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/bindings_test_stress_mt_replace.py +0 -0
  94. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/draw_git_test_plots.py +0 -0
  95. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/git_tester.py +0 -0
  96. {logosdb-0.2.0 → logosdb-0.2.2}/third_party/hnswlib/tests/python/speedtest.py +0 -0
  97. {logosdb-0.2.0 → logosdb-0.2.2}/tools/logosdb-bench.cpp +0 -0
  98. {logosdb-0.2.0 → logosdb-0.2.2}/tools/logosdb-cli.cpp +0 -0
@@ -1,6 +1,32 @@
1
1
  logosdb change log
2
2
  ==================
3
3
 
4
+ 0.2.2 (2026-04-20)
5
+ -------------------
6
+
7
+ * Introduced Write-Ahead Log (WAL) for atomic Put operations. Closes #2.
8
+ - New `src/wal.{h,cpp}` implementing `WriteAheadLog` class with binary
9
+ append-only format, state tracking (PENDING/COMMITTED), and replay.
10
+ - `logosdb_put` now writes to WAL first (durability point), then modifies
11
+ vector storage, metadata, and HNSW index. On success, WAL entry is marked
12
+ committed; on crash, pending entries are replayed on next open.
13
+ - `logosdb_open` replays any pending WAL entries before serving requests,
14
+ ensuring consistency across all three stores after partial failures.
15
+ - New `wal.log` file in database directory; backward compatible with
16
+ existing databases (WAL is empty on first open).
17
+ - New test `test_wal_crash_recovery` validates crash recovery behavior.
18
+
19
+ 0.2.1 (2026-04-20)
20
+ -------------------
21
+
22
+ * Replaced hand-rolled JSON parser with nlohmann/json (v3.11.3, single-header,
23
+ vendored in third_party/nlohmann/). This fixes multiple parsing edge cases:
24
+ unicode escape sequences (\uXXXX), key ordering independence, extra
25
+ whitespace tolerance, and proper escape handling. Closes #5.
26
+ Added regression tests for unicode, empty strings, complex backslash
27
+ escapes, and key-order variations. All 130 C++ tests pass; total assertions
28
+ up from 115 to 130.
29
+
4
30
  0.2.0 (2026-04-17)
5
31
  -------------------
6
32
 
@@ -1,5 +1,5 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
- project(logosdb VERSION 0.2.0 LANGUAGES CXX)
2
+ project(logosdb VERSION 0.2.2 LANGUAGES CXX)
3
3
 
4
4
  set(CMAKE_CXX_STANDARD 17)
5
5
  set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -8,22 +8,26 @@ option(LOGOSDB_BUILD_TOOLS "Build CLI and benchmark tools" ON)
8
8
  option(LOGOSDB_BUILD_TESTS "Build unit tests" ON)
9
9
  option(LOGOSDB_BUILD_PYTHON "Build Python bindings (pybind11)" OFF)
10
10
 
11
- # hnswlib is header-only
11
+ # hnswlib and nlohmann/json are header-only
12
12
  add_library(hnswlib INTERFACE)
13
13
  target_include_directories(hnswlib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
14
14
 
15
+ add_library(nlohmann_json INTERFACE)
16
+ target_include_directories(nlohmann_json INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
17
+
15
18
  # Core library
16
19
  add_library(logosdb
17
20
  src/logosdb.cpp
18
21
  src/storage.cpp
19
22
  src/metadata.cpp
20
23
  src/hnsw_index.cpp
24
+ src/wal.cpp
21
25
  )
22
26
  target_include_directories(logosdb PUBLIC
23
27
  ${CMAKE_CURRENT_SOURCE_DIR}/include
24
28
  ${CMAKE_CURRENT_SOURCE_DIR}/src
25
29
  )
26
- target_link_libraries(logosdb PRIVATE hnswlib)
30
+ target_link_libraries(logosdb PRIVATE hnswlib nlohmann_json)
27
31
 
28
32
  # Position-independent code is required when the static library is linked
29
33
  # into a shared object (the Python extension is a .so).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: logosdb
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Fast semantic vector database (HNSW + mmap) with Python bindings
5
5
  Keywords: vector-database,hnsw,embeddings,semantic-search,nearest-neighbor
6
6
  Author-Email: Jose <jose-compu@users.noreply.github.com>
@@ -233,7 +233,7 @@ Here is a performance report from the included `logosdb-bench` program. The resu
233
233
 
234
234
  We use databases with 1K, 10K, and 100K vectors. Each vector has 2048 dimensions (matching typical LLM embedding sizes). Vectors are L2-normalized random unit vectors.
235
235
 
236
- LogosDB: version 0.2.0
236
+ LogosDB: version 0.2.2
237
237
  CPU: Apple M-series (ARM64)
238
238
  Dim: 2048
239
239
  HNSW M: 16, ef_construction: 200, ef_search: 50
@@ -185,7 +185,7 @@ Here is a performance report from the included `logosdb-bench` program. The resu
185
185
 
186
186
  We use databases with 1K, 10K, and 100K vectors. Each vector has 2048 dimensions (matching typical LLM embedding sizes). Vectors are L2-normalized random unit vectors.
187
187
 
188
- LogosDB: version 0.2.0
188
+ LogosDB: version 0.2.2
189
189
  CPU: Apple M-series (ARM64)
190
190
  Dim: 2048
191
191
  HNSW M: 16, ef_construction: 200, ef_search: 50
@@ -6,8 +6,8 @@
6
6
 
7
7
  #define LOGOSDB_VERSION_MAJOR 0
8
8
  #define LOGOSDB_VERSION_MINOR 2
9
- #define LOGOSDB_VERSION_PATCH 0
10
- #define LOGOSDB_VERSION_STRING "0.2.0"
9
+ #define LOGOSDB_VERSION_PATCH 2
10
+ #define LOGOSDB_VERSION_STRING "0.2.2"
11
11
 
12
12
  #ifdef __cplusplus
13
13
  extern "C" {
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
4
4
 
5
5
  [project]
6
6
  name = "logosdb"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Fast semantic vector database (HNSW + mmap) with Python bindings"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -2,10 +2,12 @@
2
2
  #include "storage.h"
3
3
  #include "metadata.h"
4
4
  #include "hnsw_index.h"
5
+ #include "wal.h"
5
6
 
6
7
  #include <cstdlib>
7
8
  #include <cstring>
8
9
  #include <filesystem>
10
+ #include <functional>
9
11
  #include <mutex>
10
12
  #include <string>
11
13
  #include <vector>
@@ -15,11 +17,12 @@ using namespace logosdb::internal;
15
17
  /* ── Internal DB struct ────────────────────────────────────────────── */
16
18
 
17
19
  struct logosdb_t {
18
- VectorStorage vectors;
19
- MetadataStore meta;
20
- HnswIndex index;
21
- std::mutex mu;
22
- int dim = 0;
20
+ VectorStorage vectors;
21
+ MetadataStore meta;
22
+ HnswIndex index;
23
+ WriteAheadLog wal;
24
+ std::mutex mu;
25
+ int dim = 0;
23
26
  };
24
27
 
25
28
  struct logosdb_options_t {
@@ -80,6 +83,7 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
80
83
  std::string vec_path = std::string(path) + "/vectors.bin";
81
84
  std::string meta_path = std::string(path) + "/meta.jsonl";
82
85
  std::string idx_path = std::string(path) + "/hnsw.idx";
86
+ std::string wal_path = std::string(path) + "/wal.log";
83
87
 
84
88
  if (!db->vectors.open(vec_path, opts->dim, err)) {
85
89
  set_err(errptr, err);
@@ -105,6 +109,48 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
105
109
  return nullptr;
106
110
  }
107
111
 
112
+ // Open WAL and replay any pending entries for atomic recovery.
113
+ if (!db->wal.open(wal_path, err)) {
114
+ set_err(errptr, err);
115
+ delete db;
116
+ return nullptr;
117
+ }
118
+
119
+ // Replay pending WAL entries to ensure consistency.
120
+ int replayed = db->wal.replay_pending(
121
+ [&db](const WALEntry & entry, std::string & replay_err) -> bool {
122
+ // Validate: expected_id should match current row count
123
+ if (entry.expected_id != db->vectors.n_rows()) {
124
+ replay_err = "wal replay: expected_id mismatch (" +
125
+ std::to_string(entry.expected_id) + " vs " +
126
+ std::to_string(db->vectors.n_rows()) + ")";
127
+ return false;
128
+ }
129
+
130
+ // Replay vector
131
+ uint64_t vid = db->vectors.append(entry.vector.data(), (int)entry.dim, replay_err);
132
+ if (vid == UINT64_MAX) return false;
133
+
134
+ // Replay metadata
135
+ uint64_t mid = db->meta.append(entry.text.c_str(), entry.timestamp.c_str(), replay_err);
136
+ if (mid == UINT64_MAX) return false;
137
+
138
+ // Replay index
139
+ if (!db->index.add(vid, entry.vector.data(), replay_err)) {
140
+ return false;
141
+ }
142
+
143
+ return true;
144
+ },
145
+ err
146
+ );
147
+
148
+ if (replayed < 0) {
149
+ set_err(errptr, "wal replay: " + err);
150
+ delete db;
151
+ return nullptr;
152
+ }
153
+
108
154
  // Backfill index if vector storage has more rows than the index (e.g. crash recovery).
109
155
  size_t n_vec = db->vectors.n_rows();
110
156
  size_t n_idx = db->index.count();
@@ -144,9 +190,11 @@ logosdb_t * logosdb_open(const char * path, const logosdb_options_t * opts,
144
190
  void logosdb_close(logosdb_t * db) {
145
191
  if (!db) return;
146
192
  std::string err;
193
+ db->wal.sync(err); // Ensure WAL is durable before closing other stores
147
194
  db->index.save(err);
148
195
  db->vectors.sync(err);
149
196
  db->meta.sync(err);
197
+ db->wal.close();
150
198
  delete db;
151
199
  }
152
200
 
@@ -169,22 +217,38 @@ uint64_t logosdb_put(logosdb_t * db,
169
217
  std::lock_guard<std::mutex> lock(db->mu);
170
218
  std::string err;
171
219
 
172
- // NOTE: these three writes are not atomic. On partial failure the stores
173
- // may diverge (e.g. vectors written but metadata missing). The HNSW index
174
- // is backfilled from the vector store on next open, but metadata gaps are
175
- // not currently recoverable. A WAL would fix this; acceptable for now
176
- // given the single-process embedded use case.
220
+ // Compute expected row id before writing
221
+ uint64_t expected_id = db->vectors.n_rows();
222
+
223
+ // Step 1: Write WAL entry (durability point)
224
+ int64_t wal_offset = db->wal.append_pending(embedding, dim, text, timestamp, expected_id, err);
225
+ if (wal_offset < 0) { set_err(errptr, err); return UINT64_MAX; }
226
+
227
+ // Step 2: Write to vector storage
177
228
  uint64_t vid = db->vectors.append(embedding, dim, err);
178
229
  if (vid == UINT64_MAX) { set_err(errptr, err); return UINT64_MAX; }
179
230
 
231
+ // Step 3: Write to metadata storage
180
232
  uint64_t mid = db->meta.append(text, timestamp, err);
181
- if (mid == UINT64_MAX) { set_err(errptr, err); return UINT64_MAX; }
233
+ if (mid == UINT64_MAX) {
234
+ // Metadata write failed - entry remains in WAL for replay on recovery
235
+ set_err(errptr, err);
236
+ return UINT64_MAX;
237
+ }
182
238
 
239
+ // Step 4: Write to HNSW index
183
240
  if (!db->index.add(vid, embedding, err)) {
241
+ // Index write failed - entry remains in WAL for replay on recovery
184
242
  set_err(errptr, err);
185
243
  return UINT64_MAX;
186
244
  }
187
245
 
246
+ // Step 5: Mark WAL entry as committed
247
+ if (!db->wal.mark_committed(wal_offset, err)) {
248
+ // Non-fatal: entry will be replayed on next open if needed
249
+ // Log but don't fail the operation
250
+ }
251
+
188
252
  return vid;
189
253
  }
190
254
 
@@ -1,65 +1,17 @@
1
1
  #include "metadata.h"
2
2
 
3
+ #include <nlohmann/json.hpp>
4
+
3
5
  #include <cerrno>
4
6
  #include <cstring>
5
7
  #include <fstream>
6
- #include <sstream>
7
8
  #include <fcntl.h>
8
9
  #include <unistd.h>
9
10
 
10
11
  namespace logosdb {
11
12
  namespace internal {
12
13
 
13
- // Minimal JSON helpers — no external dependency.
14
-
15
- static std::string json_escape(const std::string & s) {
16
- std::string out;
17
- out.reserve(s.size() + 8);
18
- for (char c : s) {
19
- switch (c) {
20
- case '"': out += "\\\""; break;
21
- case '\\': out += "\\\\"; break;
22
- case '\n': out += "\\n"; break;
23
- case '\r': out += "\\r"; break;
24
- case '\t': out += "\\t"; break;
25
- default: out += c;
26
- }
27
- }
28
- return out;
29
- }
30
-
31
- static std::string json_unescape(const std::string & s) {
32
- std::string out;
33
- out.reserve(s.size());
34
- for (size_t i = 0; i < s.size(); ++i) {
35
- if (s[i] == '\\' && i + 1 < s.size()) {
36
- switch (s[i + 1]) {
37
- case '"': out += '"'; ++i; break;
38
- case '\\': out += '\\'; ++i; break;
39
- case 'n': out += '\n'; ++i; break;
40
- case 'r': out += '\r'; ++i; break;
41
- case 't': out += '\t'; ++i; break;
42
- default: out += s[i];
43
- }
44
- } else {
45
- out += s[i];
46
- }
47
- }
48
- return out;
49
- }
50
-
51
- static std::string extract_field(const std::string & line, const std::string & key) {
52
- std::string needle = "\"" + key + "\":\"";
53
- auto pos = line.find(needle);
54
- if (pos == std::string::npos) return "";
55
- pos += needle.size();
56
- auto end = line.find('"', pos);
57
- while (end != std::string::npos && end > 0 && line[end - 1] == '\\') {
58
- end = line.find('"', end + 1);
59
- }
60
- if (end == std::string::npos) return "";
61
- return json_unescape(line.substr(pos, end - pos));
62
- }
14
+ using json = nlohmann::json;
63
15
 
64
16
  MetadataStore::~MetadataStore() { close(); }
65
17
 
@@ -78,29 +30,34 @@ bool MetadataStore::open(const std::string & path, std::string & err) {
78
30
  std::string line;
79
31
  while (std::getline(in, line)) {
80
32
  if (line.empty()) continue;
33
+
34
+ // Try to parse as JSON
35
+ json j;
36
+ try {
37
+ j = json::parse(line);
38
+ } catch (const json::exception & e) {
39
+ // Invalid JSON line - skip but don't fail
40
+ continue;
41
+ }
42
+
81
43
  // Tombstone record: {"op":"del","id":N}
82
- if (line.find("\"op\":\"del\"") != std::string::npos) {
83
- auto key = std::string("\"id\":");
84
- auto pos = line.find(key);
85
- if (pos == std::string::npos) continue;
86
- pos += key.size();
87
- uint64_t id = 0;
88
- bool got_digit = false;
89
- while (pos < line.size() && (line[pos] == ' ' || line[pos] == '\t')) ++pos;
90
- while (pos < line.size() && line[pos] >= '0' && line[pos] <= '9') {
91
- id = id * 10 + (uint64_t)(line[pos] - '0');
92
- got_digit = true;
93
- ++pos;
94
- }
95
- if (got_digit && id < rows_.size() && !rows_[id].deleted) {
44
+ if (j.contains("op") && j["op"] == "del" && j.contains("id")) {
45
+ uint64_t id = j["id"].get<uint64_t>();
46
+ if (id < rows_.size() && !rows_[id].deleted) {
96
47
  rows_[id].deleted = true;
97
48
  ++num_deleted_;
98
49
  }
99
50
  continue;
100
51
  }
52
+
53
+ // Data row: {"text":"...","ts":"..."}
101
54
  MetaRow r;
102
- r.text = extract_field(line, "text");
103
- r.timestamp = extract_field(line, "ts");
55
+ if (j.contains("text")) {
56
+ r.text = j["text"].get<std::string>();
57
+ }
58
+ if (j.contains("ts")) {
59
+ r.timestamp = j["ts"].get<std::string>();
60
+ }
104
61
  rows_.push_back(std::move(r));
105
62
  }
106
63
  }
@@ -121,9 +78,10 @@ uint64_t MetadataStore::append(const char * text, const char * timestamp,
121
78
  std::string & err) {
122
79
  if (fd_ < 0) { err = "meta not open"; return UINT64_MAX; }
123
80
 
124
- std::string t = text ? json_escape(text) : "";
125
- std::string ts = timestamp ? json_escape(timestamp) : "";
126
- std::string line = "{\"text\":\"" + t + "\",\"ts\":\"" + ts + "\"}\n";
81
+ json j;
82
+ j["text"] = text ? text : "";
83
+ j["ts"] = timestamp ? timestamp : "";
84
+ std::string line = j.dump() + "\n";
127
85
 
128
86
  ssize_t written = ::write(fd_, line.data(), line.size());
129
87
  if (written != (ssize_t)line.size()) {
@@ -147,7 +105,11 @@ bool MetadataStore::mark_deleted(uint64_t id, std::string & err) {
147
105
  return false;
148
106
  }
149
107
 
150
- std::string line = "{\"op\":\"del\",\"id\":" + std::to_string(id) + "}\n";
108
+ json j;
109
+ j["op"] = "del";
110
+ j["id"] = id;
111
+ std::string line = j.dump() + "\n";
112
+
151
113
  ssize_t written = ::write(fd_, line.data(), line.size());
152
114
  if (written != (ssize_t)line.size()) {
153
115
  err = std::string("write tombstone: ") + strerror(errno);