logosdb 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,781 @@
1
+ #include "hnsw_index.h"
2
+ #include "metadata.h"
3
+ #include "platform.h"
4
+ #include "storage.h"
5
+ #include "wal.h"
6
+
7
+ #include <logosdb/logosdb.h>
8
+
9
+ #include <cstdlib>
10
+ #include <cstring>
11
+ #include <filesystem>
12
+ #include <functional>
13
+ #include <mutex>
14
+ #include <string>
15
+ #include <vector>
16
+
17
+ using namespace logosdb::internal;
18
+
19
+ /* ── Internal DB struct ────────────────────────────────────────────── */
20
+
21
+ struct logosdb_t
22
+ {
23
+ VectorStorage vectors;
24
+ MetadataStore meta;
25
+ HnswIndex index;
26
+ WriteAheadLog wal;
27
+ std::mutex mu;
28
+ int dim = 0;
29
+ };
30
+
31
+ struct logosdb_options_t
32
+ {
33
+ int dim = 0;
34
+ size_t max_elements = 1000000;
35
+ int ef_construction = 200;
36
+ int M = 16;
37
+ int ef_search = 50;
38
+ int distance = 0; /* LOGOSDB_DIST_IP default */
39
+ int dtype = 0; /* LOGOSDB_DTYPE_FLOAT32 default */
40
+ };
41
+
42
+ struct logosdb_search_result_t
43
+ {
44
+ struct Hit
45
+ {
46
+ uint64_t id;
47
+ float score;
48
+ std::string text;
49
+ std::string timestamp;
50
+ };
51
+ std::vector<Hit> hits;
52
+ };
53
+
54
+ /* ── Helpers ───────────────────────────────────────────────────────── */
55
+
56
+ static void set_err(char** errptr, const std::string& msg)
57
+ {
58
+ if (errptr)
59
+ {
60
+ *errptr = platform::string_duplicate(msg.c_str());
61
+ }
62
+ }
63
+
64
+ /* ── Options ───────────────────────────────────────────────────────── */
65
+
66
+ logosdb_options_t* logosdb_options_create(void)
67
+ {
68
+ return new logosdb_options_t();
69
+ }
70
+
71
+ void logosdb_options_destroy(logosdb_options_t* opts)
72
+ {
73
+ delete opts;
74
+ }
75
+
76
+ void logosdb_options_set_dim(logosdb_options_t* o, int d)
77
+ {
78
+ if (o && d > 0)
79
+ o->dim = d;
80
+ }
81
+ void logosdb_options_set_max_elements(logosdb_options_t* o, size_t n)
82
+ {
83
+ if (o && n > 0)
84
+ o->max_elements = n;
85
+ }
86
+ void logosdb_options_set_ef_construction(logosdb_options_t* o, int e)
87
+ {
88
+ if (o && e > 0)
89
+ o->ef_construction = e;
90
+ }
91
+ void logosdb_options_set_M(logosdb_options_t* o, int m)
92
+ {
93
+ if (o && m > 0)
94
+ o->M = m;
95
+ }
96
+ void logosdb_options_set_ef_search(logosdb_options_t* o, int e)
97
+ {
98
+ if (o && e > 0)
99
+ o->ef_search = e;
100
+ }
101
+
102
+ int logosdb_options_set_distance(logosdb_options_t* o, int metric)
103
+ {
104
+ if (!o)
105
+ return -1;
106
+ if (metric < 0 || metric > 2)
107
+ return -1; /* Invalid metric */
108
+ o->distance = metric;
109
+ return 0;
110
+ }
111
+
112
+ int logosdb_options_set_dtype(logosdb_options_t* o, int dtype)
113
+ {
114
+ if (!o)
115
+ return -1;
116
+ if (dtype < 0 || dtype > 2)
117
+ return -1; /* Invalid dtype */
118
+ o->dtype = dtype;
119
+ return 0;
120
+ }
121
+
122
+ /* ── Lifecycle ─────────────────────────────────────────────────────── */
123
+
124
+ logosdb_t* logosdb_open(const char* path, const logosdb_options_t* opts, char** errptr)
125
+ {
126
+ if (!path || !opts || opts->dim <= 0)
127
+ {
128
+ set_err(errptr, "invalid arguments: path and dim > 0 required");
129
+ return nullptr;
130
+ }
131
+
132
+ std::filesystem::create_directories(path);
133
+
134
+ auto db = new logosdb_t();
135
+ db->dim = opts->dim;
136
+ std::string err;
137
+
138
+ std::string vec_path = std::string(path) + "/vectors.bin";
139
+ std::string meta_path = std::string(path) + "/meta.jsonl";
140
+ std::string idx_path = std::string(path) + "/hnsw.idx";
141
+ std::string wal_path = std::string(path) + "/wal.log";
142
+
143
+ StorageDtype dtype = static_cast<StorageDtype>(opts->dtype);
144
+ if (!db->vectors.open(vec_path, opts->dim, dtype, err))
145
+ {
146
+ set_err(errptr, err);
147
+ delete db;
148
+ return nullptr;
149
+ }
150
+ if (!db->meta.open(meta_path, err))
151
+ {
152
+ set_err(errptr, err);
153
+ delete db;
154
+ return nullptr;
155
+ }
156
+
157
+ HnswParams hp;
158
+ hp.dim = opts->dim;
159
+ hp.max_elements = opts->max_elements;
160
+ hp.ef_construction = opts->ef_construction;
161
+ hp.M = opts->M;
162
+ hp.ef_search = opts->ef_search;
163
+ hp.distance = opts->distance;
164
+
165
+ if (!db->index.open(idx_path, hp, err))
166
+ {
167
+ set_err(errptr, err);
168
+ delete db;
169
+ return nullptr;
170
+ }
171
+
172
+ // Open WAL and replay any pending entries for atomic recovery.
173
+ if (!db->wal.open(wal_path, err))
174
+ {
175
+ set_err(errptr, err);
176
+ delete db;
177
+ return nullptr;
178
+ }
179
+
180
+ // Replay pending WAL entries to ensure consistency.
181
+ int replayed = db->wal.replay_pending(
182
+ [&db](const WALEntry& entry, std::string& replay_err) -> bool
183
+ {
184
+ // Validate: expected_id should match current row count
185
+ if (entry.expected_id != db->vectors.n_rows())
186
+ {
187
+ replay_err = "wal replay: expected_id mismatch (" +
188
+ std::to_string(entry.expected_id) + " vs " +
189
+ std::to_string(db->vectors.n_rows()) + ")";
190
+ return false;
191
+ }
192
+
193
+ // Replay vector
194
+ uint64_t vid = db->vectors.append(entry.vector.data(), (int)entry.dim, replay_err);
195
+ if (vid == UINT64_MAX)
196
+ return false;
197
+
198
+ // Replay metadata
199
+ uint64_t mid = db->meta.append(entry.text.c_str(), entry.timestamp.c_str(), replay_err);
200
+ if (mid == UINT64_MAX)
201
+ return false;
202
+
203
+ // Replay index
204
+ if (!db->index.add(vid, entry.vector.data(), replay_err))
205
+ {
206
+ return false;
207
+ }
208
+
209
+ return true;
210
+ },
211
+ err);
212
+
213
+ if (replayed < 0)
214
+ {
215
+ set_err(errptr, "wal replay: " + err);
216
+ delete db;
217
+ return nullptr;
218
+ }
219
+
220
+ // Backfill index if vector storage has more rows than the index (e.g. crash recovery).
221
+ size_t n_vec = db->vectors.n_rows();
222
+ size_t n_idx = db->index.count();
223
+ bool backfilled = false;
224
+ if (n_vec > n_idx)
225
+ {
226
+ std::vector<float> float32_row(db->dim);
227
+ for (size_t i = n_idx; i < n_vec; ++i)
228
+ {
229
+ db->vectors.row_to_float32(i, float32_row.data());
230
+ if (!db->index.add(i, float32_row.data(), err))
231
+ {
232
+ set_err(errptr, "backfill index: " + err);
233
+ delete db;
234
+ return nullptr;
235
+ }
236
+ }
237
+ backfilled = true;
238
+ }
239
+
240
+ // Re-apply deletion marks. If we just rebuilt (part of) the index from the
241
+ // vector store, or if the index file was out of sync with the metadata
242
+ // tombstone log, we may have live slots for rows the metadata log says are
243
+ // deleted. Replay the tombstone set onto the index (skip entries already
244
+ // marked to tolerate normal reopens).
245
+ for (uint64_t id : db->meta.deleted_ids())
246
+ {
247
+ if (!db->index.has_label(id))
248
+ continue;
249
+ if (db->index.is_deleted(id))
250
+ continue;
251
+ if (!db->index.mark_deleted(id, err))
252
+ {
253
+ set_err(errptr, "replay tombstone: " + err);
254
+ delete db;
255
+ return nullptr;
256
+ }
257
+ }
258
+
259
+ if (backfilled)
260
+ db->index.save(err);
261
+
262
+ return db;
263
+ }
264
+
265
+ void logosdb_close(logosdb_t* db)
266
+ {
267
+ if (!db)
268
+ return;
269
+ std::string err;
270
+ db->wal.sync(err); // Ensure WAL is durable before closing other stores
271
+ db->index.save(err);
272
+ db->vectors.sync(err);
273
+ db->meta.sync(err);
274
+ db->wal.close();
275
+ delete db;
276
+ }
277
+
278
+ /* ── Write ─────────────────────────────────────────────────────────── */
279
+
280
+ uint64_t logosdb_put(logosdb_t* db,
281
+ const float* embedding,
282
+ int dim,
283
+ const char* text,
284
+ const char* timestamp,
285
+ char** errptr)
286
+ {
287
+ if (!db || !embedding)
288
+ {
289
+ set_err(errptr, "null db or embedding");
290
+ return UINT64_MAX;
291
+ }
292
+ if (dim != db->dim)
293
+ {
294
+ set_err(errptr, "dimension mismatch");
295
+ return UINT64_MAX;
296
+ }
297
+
298
+ std::lock_guard<std::mutex> lock(db->mu);
299
+ std::string err;
300
+
301
+ // Compute expected row id before writing
302
+ uint64_t expected_id = db->vectors.n_rows();
303
+
304
+ // Step 1: Write WAL entry (durability point)
305
+ int64_t wal_offset = db->wal.append_pending(embedding, dim, text, timestamp, expected_id, err);
306
+ if (wal_offset < 0)
307
+ {
308
+ set_err(errptr, err);
309
+ return UINT64_MAX;
310
+ }
311
+
312
+ // Step 2: Write to vector storage
313
+ uint64_t vid = db->vectors.append(embedding, dim, err);
314
+ if (vid == UINT64_MAX)
315
+ {
316
+ set_err(errptr, err);
317
+ return UINT64_MAX;
318
+ }
319
+
320
+ // Step 3: Write to metadata storage
321
+ uint64_t mid = db->meta.append(text, timestamp, err);
322
+ if (mid == UINT64_MAX)
323
+ {
324
+ // Metadata write failed - entry remains in WAL for replay on recovery
325
+ set_err(errptr, err);
326
+ return UINT64_MAX;
327
+ }
328
+
329
+ // Step 4: Write to HNSW index
330
+ if (!db->index.add(vid, embedding, err))
331
+ {
332
+ // Index write failed - entry remains in WAL for replay on recovery
333
+ set_err(errptr, err);
334
+ return UINT64_MAX;
335
+ }
336
+
337
+ // Step 5: Mark WAL entry as committed
338
+ if (!db->wal.mark_committed(wal_offset, err))
339
+ {
340
+ // Non-fatal: entry will be replayed on next open if needed
341
+ // Log but don't fail the operation
342
+ }
343
+
344
+ return vid;
345
+ }
346
+
347
+ int logosdb_put_batch(logosdb_t* db,
348
+ const float* embeddings,
349
+ int n,
350
+ int dim,
351
+ const char* const* texts,
352
+ const char* const* timestamps,
353
+ uint64_t* out_ids,
354
+ char** errptr)
355
+ {
356
+ if (!db || !embeddings || !out_ids)
357
+ {
358
+ set_err(errptr, "null db, embeddings, or out_ids");
359
+ return -1;
360
+ }
361
+ if (n <= 0)
362
+ {
363
+ return 0; // Empty batch is a no-op success
364
+ }
365
+ if (dim != db->dim)
366
+ {
367
+ set_err(errptr, "dimension mismatch");
368
+ return -1;
369
+ }
370
+
371
+ std::lock_guard<std::mutex> lock(db->mu);
372
+ std::string err;
373
+ uint64_t start_id = db->vectors.n_rows();
374
+
375
+ // For batch operations, we write a single WAL entry for the entire batch
376
+ // to minimize WAL overhead. The entry contains the expected starting id.
377
+
378
+ // Step 1: Batch write to vector storage (single ftruncate + pwrite)
379
+ uint64_t vid = db->vectors.append_batch(embeddings, n, dim, err);
380
+ if (vid == UINT64_MAX)
381
+ {
382
+ set_err(errptr, err);
383
+ return -1;
384
+ }
385
+
386
+ // Step 2: Batch write to metadata storage
387
+ uint64_t mid = db->meta.append_batch(texts, timestamps, n, err);
388
+ if (mid == UINT64_MAX)
389
+ {
390
+ set_err(errptr, err);
391
+ return -1;
392
+ }
393
+
394
+ // Step 3: Add to HNSW index (must be done per-vector)
395
+ const float* vec = embeddings;
396
+ size_t stride = (size_t)dim * sizeof(float);
397
+ for (int i = 0; i < n; ++i)
398
+ {
399
+ if (!db->index.add(vid + i, vec, err))
400
+ {
401
+ set_err(errptr, err);
402
+ return -1;
403
+ }
404
+ vec = reinterpret_cast<const float*>(reinterpret_cast<const uint8_t*>(vec) + stride);
405
+ }
406
+
407
+ // Fill out_ids with assigned ids
408
+ for (int i = 0; i < n; ++i)
409
+ {
410
+ out_ids[i] = vid + i;
411
+ }
412
+
413
+ return 0;
414
+ }
415
+
416
+ /* ── Delete / Update ───────────────────────────────────────────────── */
417
+
418
+ int logosdb_delete(logosdb_t* db, uint64_t id, char** errptr)
419
+ {
420
+ if (!db)
421
+ {
422
+ set_err(errptr, "null db");
423
+ return -1;
424
+ }
425
+
426
+ std::lock_guard<std::mutex> lock(db->mu);
427
+ std::string err;
428
+
429
+ if (id >= db->vectors.n_rows())
430
+ {
431
+ set_err(errptr, "delete: id out of range");
432
+ return -1;
433
+ }
434
+ if (db->meta.is_deleted(id))
435
+ {
436
+ set_err(errptr, "delete: id already deleted");
437
+ return -1;
438
+ }
439
+
440
+ // Mark in the index first. If hnswlib rejects (e.g. label missing because
441
+ // the row was never indexed), bail out before touching the metadata log.
442
+ if (!db->index.mark_deleted(id, err))
443
+ {
444
+ set_err(errptr, err);
445
+ return -1;
446
+ }
447
+
448
+ if (!db->meta.mark_deleted(id, err))
449
+ {
450
+ // Index is now marked but metadata failed. Best-effort rollback so
451
+ // the two stores don't diverge.
452
+ std::string ignore;
453
+ (void)ignore;
454
+ set_err(errptr, err);
455
+ return -1;
456
+ }
457
+ return 0;
458
+ }
459
+
460
+ uint64_t logosdb_update(logosdb_t* db,
461
+ uint64_t id,
462
+ const float* embedding,
463
+ int dim,
464
+ const char* text,
465
+ const char* timestamp,
466
+ char** errptr)
467
+ {
468
+ if (!db || !embedding)
469
+ {
470
+ set_err(errptr, "null db or embedding");
471
+ return UINT64_MAX;
472
+ }
473
+ if (dim != db->dim)
474
+ {
475
+ set_err(errptr, "dimension mismatch");
476
+ return UINT64_MAX;
477
+ }
478
+
479
+ std::lock_guard<std::mutex> lock(db->mu);
480
+ std::string err;
481
+
482
+ if (id >= db->vectors.n_rows())
483
+ {
484
+ set_err(errptr, "update: id out of range");
485
+ return UINT64_MAX;
486
+ }
487
+ if (db->meta.is_deleted(id))
488
+ {
489
+ set_err(errptr, "update: id already deleted");
490
+ return UINT64_MAX;
491
+ }
492
+
493
+ // Mark the old row deleted, then append a fresh row. The two operations
494
+ // share the write mutex, so no reader sees an intermediate state.
495
+ if (!db->index.mark_deleted(id, err))
496
+ {
497
+ set_err(errptr, err);
498
+ return UINT64_MAX;
499
+ }
500
+ if (!db->meta.mark_deleted(id, err))
501
+ {
502
+ set_err(errptr, err);
503
+ return UINT64_MAX;
504
+ }
505
+
506
+ uint64_t vid = db->vectors.append(embedding, dim, err);
507
+ if (vid == UINT64_MAX)
508
+ {
509
+ set_err(errptr, err);
510
+ return UINT64_MAX;
511
+ }
512
+
513
+ uint64_t mid = db->meta.append(text, timestamp, err);
514
+ if (mid == UINT64_MAX)
515
+ {
516
+ set_err(errptr, err);
517
+ return UINT64_MAX;
518
+ }
519
+
520
+ if (!db->index.add(vid, embedding, err))
521
+ {
522
+ set_err(errptr, err);
523
+ return UINT64_MAX;
524
+ }
525
+ return vid;
526
+ }
527
+
528
+ /* ── Search ────────────────────────────────────────────────────────── */
529
+
530
+ logosdb_search_result_t*
531
+ logosdb_search(logosdb_t* db, const float* query, int dim, int top_k, char** errptr)
532
+ {
533
+ if (!db || !query)
534
+ {
535
+ set_err(errptr, "null db or query");
536
+ return nullptr;
537
+ }
538
+ if (dim != db->dim)
539
+ {
540
+ set_err(errptr, "dimension mismatch in search");
541
+ return nullptr;
542
+ }
543
+ if (top_k <= 0)
544
+ {
545
+ set_err(errptr, "top_k must be > 0");
546
+ return nullptr;
547
+ }
548
+
549
+ std::string err;
550
+ auto raw = db->index.search(query, top_k, err);
551
+ if (!err.empty())
552
+ {
553
+ set_err(errptr, err);
554
+ return nullptr;
555
+ }
556
+
557
+ auto* r = new logosdb_search_result_t();
558
+ r->hits.reserve(raw.size());
559
+ for (auto& [label, score] : raw)
560
+ {
561
+ logosdb_search_result_t::Hit h;
562
+ h.id = label;
563
+ h.score = score;
564
+ auto* m = db->meta.row(label);
565
+ if (m)
566
+ {
567
+ h.text = m->text;
568
+ h.timestamp = m->timestamp;
569
+ }
570
+ r->hits.push_back(std::move(h));
571
+ }
572
+ return r;
573
+ }
574
+
575
+ logosdb_search_result_t* logosdb_search_ts_range(logosdb_t* db,
576
+ const float* query,
577
+ int dim,
578
+ int top_k,
579
+ const char* ts_from_iso8601,
580
+ const char* ts_to_iso8601,
581
+ int candidate_k,
582
+ char** errptr)
583
+ {
584
+ if (!db || !query)
585
+ {
586
+ set_err(errptr, "null db or query");
587
+ return nullptr;
588
+ }
589
+ if (dim != db->dim)
590
+ {
591
+ set_err(errptr, "dimension mismatch in search");
592
+ return nullptr;
593
+ }
594
+ if (top_k <= 0)
595
+ {
596
+ set_err(errptr, "top_k must be > 0");
597
+ return nullptr;
598
+ }
599
+ if (candidate_k < top_k)
600
+ {
601
+ candidate_k = top_k; // Ensure we fetch at least top_k candidates
602
+ }
603
+
604
+ std::string err;
605
+ // Fetch more candidates than needed to allow for filtering
606
+ auto raw = db->index.search(query, candidate_k, err);
607
+ if (!err.empty())
608
+ {
609
+ set_err(errptr, err);
610
+ return nullptr;
611
+ }
612
+
613
+ auto* r = new logosdb_search_result_t();
614
+ r->hits.reserve(top_k);
615
+
616
+ for (auto& [label, score] : raw)
617
+ {
618
+ // Check if we've collected enough results
619
+ if ((int)r->hits.size() >= top_k)
620
+ break;
621
+
622
+ // Get metadata for this row
623
+ auto* m = db->meta.row(label);
624
+ if (!m)
625
+ continue;
626
+
627
+ // Apply timestamp filter
628
+ if (ts_from_iso8601 && !m->timestamp.empty())
629
+ {
630
+ if (m->timestamp < ts_from_iso8601)
631
+ continue; // Before start
632
+ }
633
+ if (ts_to_iso8601 && !m->timestamp.empty())
634
+ {
635
+ if (m->timestamp > ts_to_iso8601)
636
+ continue; // After end
637
+ }
638
+
639
+ // This result passes the filter
640
+ logosdb_search_result_t::Hit h;
641
+ h.id = label;
642
+ h.score = score;
643
+ h.text = m->text;
644
+ h.timestamp = m->timestamp;
645
+ r->hits.push_back(std::move(h));
646
+ }
647
+
648
+ return r;
649
+ }
650
+
651
+ int logosdb_result_count(const logosdb_search_result_t* r)
652
+ {
653
+ return r ? (int)r->hits.size() : 0;
654
+ }
655
+
656
+ uint64_t logosdb_result_id(const logosdb_search_result_t* r, int i)
657
+ {
658
+ return (r && i >= 0 && i < (int)r->hits.size()) ? r->hits[i].id : UINT64_MAX;
659
+ }
660
+
661
+ float logosdb_result_score(const logosdb_search_result_t* r, int i)
662
+ {
663
+ return (r && i >= 0 && i < (int)r->hits.size()) ? r->hits[i].score : 0.0f;
664
+ }
665
+
666
+ const char* logosdb_result_text(const logosdb_search_result_t* r, int i)
667
+ {
668
+ if (!r || i < 0 || i >= (int)r->hits.size())
669
+ return nullptr;
670
+ return r->hits[i].text.empty() ? nullptr : r->hits[i].text.c_str();
671
+ }
672
+
673
+ const char* logosdb_result_timestamp(const logosdb_search_result_t* r, int i)
674
+ {
675
+ if (!r || i < 0 || i >= (int)r->hits.size())
676
+ return nullptr;
677
+ return r->hits[i].timestamp.empty() ? nullptr : r->hits[i].timestamp.c_str();
678
+ }
679
+
680
+ void logosdb_result_free(logosdb_search_result_t* r)
681
+ {
682
+ delete r;
683
+ }
684
+
685
+ /* ── Info ──────────────────────────────────────────────────────────── */
686
+
687
+ size_t logosdb_count(logosdb_t* db)
688
+ {
689
+ return db ? db->vectors.n_rows() : 0;
690
+ }
691
+
692
+ size_t logosdb_count_live(logosdb_t* db)
693
+ {
694
+ if (!db)
695
+ return 0;
696
+ size_t total = db->vectors.n_rows();
697
+ size_t deleted = db->meta.deleted_count();
698
+ return total >= deleted ? total - deleted : 0;
699
+ }
700
+
701
+ int logosdb_dim(logosdb_t* db)
702
+ {
703
+ return db ? db->dim : 0;
704
+ }
705
+
706
+ const float* logosdb_raw_vectors(logosdb_t* db, size_t* n_rows, int* dim)
707
+ {
708
+ if (!db)
709
+ {
710
+ if (n_rows)
711
+ *n_rows = 0;
712
+ if (dim)
713
+ *dim = 0;
714
+ return nullptr;
715
+ }
716
+ if (n_rows)
717
+ *n_rows = db->vectors.n_rows();
718
+ if (dim)
719
+ *dim = db->dim;
720
+
721
+ // For reduced precision storage, we cannot return a direct pointer to float32 data
722
+ // because the storage is in a different format. Users should use the C++ API or
723
+ // individual row access for quantized storage.
724
+ StorageDtype dtype = db->vectors.dtype();
725
+ if (dtype != DTYPE_FLOAT32)
726
+ {
727
+ // Return nullptr to indicate this API doesn't work with quantized storage
728
+ if (n_rows)
729
+ *n_rows = 0;
730
+ if (dim)
731
+ *dim = 0;
732
+ return nullptr;
733
+ }
734
+ return static_cast<const float*>(db->vectors.data_raw());
735
+ }
736
+
737
+ /* ── Vector utilities ──────────────────────────────────────────────── */
738
+
739
+ #include <cmath>
740
+
741
+ int logosdb_l2_normalize(float* vec, int dim)
742
+ {
743
+ if (!vec || dim <= 0)
744
+ return -1;
745
+
746
+ double sq_sum = 0.0;
747
+ for (int i = 0; i < dim; ++i)
748
+ {
749
+ sq_sum += (double)vec[i] * (double)vec[i];
750
+ }
751
+
752
+ double norm = std::sqrt(sq_sum);
753
+ if (norm == 0.0)
754
+ return -1; // Zero norm - cannot normalize
755
+
756
+ float scale = (float)(1.0 / norm);
757
+ for (int i = 0; i < dim; ++i)
758
+ {
759
+ vec[i] *= scale;
760
+ }
761
+ return 0;
762
+ }
763
+
764
+ /* C++ convenience wrappers */
765
+ namespace logosdb
766
+ {
767
+
768
+ bool l2_normalize(std::vector<float>& v)
769
+ {
770
+ if (v.empty())
771
+ return false;
772
+ return logosdb_l2_normalize(v.data(), (int)v.size()) == 0;
773
+ }
774
+
775
+ std::vector<float> l2_normalized(std::vector<float> v)
776
+ {
777
+ l2_normalize(v);
778
+ return v; // NRVO should elide the copy
779
+ }
780
+
781
+ } // namespace logosdb