@equationalapplications/core-llm-wiki 2.6.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -1
- package/dist/index.d.mts +77 -5
- package/dist/index.d.ts +77 -5
- package/dist/index.js +696 -219
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +696 -219
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -24,7 +24,8 @@ async function setupDatabase(db, prefix) {
|
|
|
24
24
|
last_accessed_at INTEGER,
|
|
25
25
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
26
26
|
deleted_at INTEGER,
|
|
27
|
-
embedding TEXT
|
|
27
|
+
embedding TEXT,
|
|
28
|
+
embedding_blob BLOB
|
|
28
29
|
);
|
|
29
30
|
|
|
30
31
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -97,6 +98,20 @@ var MIGRATIONS = [
|
|
|
97
98
|
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
98
99
|
}
|
|
99
100
|
}
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
version: 3,
|
|
104
|
+
description: "Add embedding_blob BLOB column for Float32Array vector storage",
|
|
105
|
+
run: async (db, prefix) => {
|
|
106
|
+
const cols = await db.getAllAsync(
|
|
107
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
108
|
+
);
|
|
109
|
+
if (!cols.some((c) => c.name === "embedding_blob")) {
|
|
110
|
+
await db.execAsync(
|
|
111
|
+
`ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
100
115
|
}
|
|
101
116
|
];
|
|
102
117
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -154,6 +169,34 @@ function cosineSimilarity(a, b) {
|
|
|
154
169
|
return denom === 0 ? 0 : dot / denom;
|
|
155
170
|
}
|
|
156
171
|
|
|
172
|
+
// src/utils/embedding.ts
|
|
173
|
+
function parseEmbedding(blob, text) {
|
|
174
|
+
if (blob && blob.byteLength > 0) {
|
|
175
|
+
if (blob.byteLength % 4 !== 0) return null;
|
|
176
|
+
const copy = new ArrayBuffer(blob.byteLength);
|
|
177
|
+
new Uint8Array(copy).set(blob);
|
|
178
|
+
const vector = new Float32Array(copy);
|
|
179
|
+
for (const value of vector) {
|
|
180
|
+
if (!Number.isFinite(value)) return null;
|
|
181
|
+
}
|
|
182
|
+
return vector;
|
|
183
|
+
}
|
|
184
|
+
if (text) {
|
|
185
|
+
try {
|
|
186
|
+
const arr = JSON.parse(text);
|
|
187
|
+
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
188
|
+
const vector = new Float32Array(arr);
|
|
189
|
+
for (const value of vector) {
|
|
190
|
+
if (!Number.isFinite(value)) return null;
|
|
191
|
+
}
|
|
192
|
+
return vector;
|
|
193
|
+
} catch {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
|
|
157
200
|
// src/WikiMemory.ts
|
|
158
201
|
function parseJsonResponse(text) {
|
|
159
202
|
const firstBrace = text.indexOf("{");
|
|
@@ -358,7 +401,7 @@ function jaccardScore(a, b) {
|
|
|
358
401
|
}
|
|
359
402
|
var FUZZY_THRESHOLD = 0.5;
|
|
360
403
|
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
361
|
-
var
|
|
404
|
+
var _WikiMemory = class _WikiMemory {
|
|
362
405
|
constructor(db, options) {
|
|
363
406
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
364
407
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
@@ -372,6 +415,7 @@ var WikiMemory = class {
|
|
|
372
415
|
}
|
|
373
416
|
});
|
|
374
417
|
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
418
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
375
419
|
this.db = db;
|
|
376
420
|
this.options = options;
|
|
377
421
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
@@ -438,10 +482,6 @@ var WikiMemory = class {
|
|
|
438
482
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
439
483
|
[String(dim)]
|
|
440
484
|
);
|
|
441
|
-
} else {
|
|
442
|
-
await this.db.runAsync(
|
|
443
|
-
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
444
|
-
);
|
|
445
485
|
}
|
|
446
486
|
} else {
|
|
447
487
|
await this.db.runAsync(
|
|
@@ -460,7 +500,18 @@ var WikiMemory = class {
|
|
|
460
500
|
const mismatch = await this.db.getFirstAsync(
|
|
461
501
|
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
462
502
|
);
|
|
463
|
-
if (mismatch)
|
|
503
|
+
if (!mismatch) return;
|
|
504
|
+
const newDim = parseInt(mismatch.value, 10);
|
|
505
|
+
const residual = await this.db.getFirstAsync(
|
|
506
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
507
|
+
WHERE deleted_at IS NULL
|
|
508
|
+
AND (
|
|
509
|
+
(embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
|
|
510
|
+
OR (embedding_blob IS NULL AND embedding IS NOT NULL)
|
|
511
|
+
)`,
|
|
512
|
+
[newDim]
|
|
513
|
+
);
|
|
514
|
+
if (!residual || residual.cnt === 0) {
|
|
464
515
|
await this.db.runAsync(
|
|
465
516
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
466
517
|
[mismatch.value]
|
|
@@ -491,10 +542,23 @@ var WikiMemory = class {
|
|
|
491
542
|
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
492
543
|
return false;
|
|
493
544
|
}
|
|
494
|
-
|
|
545
|
+
const float32Vector = new Float32Array(vector);
|
|
546
|
+
let hasNonFinite = false;
|
|
547
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
548
|
+
if (!isFinite(float32Vector[i])) {
|
|
549
|
+
hasNonFinite = true;
|
|
550
|
+
break;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
if (hasNonFinite) {
|
|
554
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
555
|
+
return false;
|
|
556
|
+
}
|
|
557
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
558
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
495
559
|
await this.db.runAsync(
|
|
496
|
-
`UPDATE ${this.prefix}entries SET embedding =
|
|
497
|
-
[
|
|
560
|
+
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
561
|
+
[blob, fact.id]
|
|
498
562
|
);
|
|
499
563
|
return true;
|
|
500
564
|
} catch (err) {
|
|
@@ -614,9 +678,24 @@ var WikiMemory = class {
|
|
|
614
678
|
_globalReembedKey() {
|
|
615
679
|
return `${this.prefix}:reembed`;
|
|
616
680
|
}
|
|
681
|
+
_importKey(entityId) {
|
|
682
|
+
return `${this.prefix}:${entityId}:import`;
|
|
683
|
+
}
|
|
684
|
+
_globalImportKey() {
|
|
685
|
+
return `${this.prefix}:import`;
|
|
686
|
+
}
|
|
687
|
+
_forgetKey(entityId) {
|
|
688
|
+
return `${this.prefix}:${entityId}:forget`;
|
|
689
|
+
}
|
|
617
690
|
_isReembedActive(entityId) {
|
|
618
691
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
619
692
|
}
|
|
693
|
+
_isImportActiveFor(entityId) {
|
|
694
|
+
return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
|
|
695
|
+
}
|
|
696
|
+
_isForgetActiveFor(entityId) {
|
|
697
|
+
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
698
|
+
}
|
|
620
699
|
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
621
700
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
622
701
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
@@ -659,6 +738,10 @@ var WikiMemory = class {
|
|
|
659
738
|
blockingOperation = "reembed";
|
|
660
739
|
} else if (isIngestRunning) {
|
|
661
740
|
blockingOperation = "ingest";
|
|
741
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
742
|
+
blockingOperation = "import";
|
|
743
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
744
|
+
blockingOperation = "forget";
|
|
662
745
|
}
|
|
663
746
|
if (blockingOperation !== null) {
|
|
664
747
|
throw new WikiBusyError(blockingOperation, entityId);
|
|
@@ -703,19 +786,27 @@ var WikiMemory = class {
|
|
|
703
786
|
await this.db.execAsync(`VACUUM`);
|
|
704
787
|
}
|
|
705
788
|
await this.rebuildMiniSearchIndex(entityId);
|
|
789
|
+
this.vectorCache.delete(entityId);
|
|
706
790
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
707
791
|
} finally {
|
|
708
792
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
709
793
|
}
|
|
710
794
|
}
|
|
711
|
-
async read(entityId, query) {
|
|
712
|
-
const
|
|
795
|
+
async read(entityId, query, options) {
|
|
796
|
+
const config = this.options.config;
|
|
797
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
798
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
799
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
800
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
801
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
802
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
803
|
+
const skipEmbed = weight === 0;
|
|
713
804
|
const embedFn = this.options.llmProvider.embed;
|
|
714
805
|
const trimmedQuery = query.trim();
|
|
715
806
|
let facts = [];
|
|
716
|
-
if (trimmedQuery) {
|
|
807
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
717
808
|
let usedEmbed = false;
|
|
718
|
-
if (embedFn) {
|
|
809
|
+
if (!skipEmbed && embedFn) {
|
|
719
810
|
try {
|
|
720
811
|
const queryVec = await embedFn(trimmedQuery);
|
|
721
812
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -734,49 +825,138 @@ var WikiMemory = class {
|
|
|
734
825
|
);
|
|
735
826
|
}
|
|
736
827
|
}
|
|
737
|
-
const
|
|
738
|
-
`SELECT
|
|
739
|
-
|
|
828
|
+
const mismatchedCount = await this.db.getFirstAsync(
|
|
829
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
830
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
831
|
+
AND embedding_blob IS NOT NULL
|
|
832
|
+
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
833
|
+
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
834
|
+
[entityId, queryVec.length]
|
|
740
835
|
);
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
836
|
+
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
837
|
+
throw new Error(
|
|
838
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
839
|
+
);
|
|
840
|
+
}
|
|
841
|
+
let candidateRows;
|
|
842
|
+
let populateCache = true;
|
|
843
|
+
let miniSearchScores;
|
|
844
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
845
|
+
populateCache = false;
|
|
846
|
+
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
847
|
+
filter: (r) => r.entity_id === entityId,
|
|
848
|
+
combineWith: "OR"
|
|
849
|
+
});
|
|
850
|
+
if (preResults.length === 0) {
|
|
851
|
+
candidateRows = null;
|
|
852
|
+
} else {
|
|
853
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
854
|
+
if (topKResults.length === 0) {
|
|
855
|
+
candidateRows = null;
|
|
856
|
+
} else {
|
|
857
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
858
|
+
const inClauseChunkSize = 500;
|
|
859
|
+
candidateRows = [];
|
|
860
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
861
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
862
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
863
|
+
const chunkRows = await this.db.getAllAsync(
|
|
864
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
865
|
+
idChunk
|
|
866
|
+
);
|
|
867
|
+
candidateRows.push(...chunkRows);
|
|
868
|
+
}
|
|
869
|
+
if (weight !== void 0 && weight < 1) {
|
|
870
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
871
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
748
872
|
}
|
|
749
|
-
} catch {
|
|
750
873
|
}
|
|
751
874
|
}
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
875
|
+
} else {
|
|
876
|
+
candidateRows = await this.db.getAllAsync(
|
|
877
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
878
|
+
[entityId]
|
|
879
|
+
);
|
|
880
|
+
if (weight !== void 0 && weight < 1) {
|
|
881
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
882
|
+
filter: (r) => r.entity_id === entityId,
|
|
883
|
+
combineWith: "OR"
|
|
884
|
+
});
|
|
885
|
+
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
886
|
+
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
758
887
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
888
|
+
}
|
|
889
|
+
if (candidateRows === null) {
|
|
890
|
+
usedEmbed = true;
|
|
891
|
+
} else {
|
|
892
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
893
|
+
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
894
|
+
if (tooLarge && entityCache) {
|
|
895
|
+
this.vectorCache.delete(entityId);
|
|
896
|
+
entityCache = void 0;
|
|
762
897
|
}
|
|
763
|
-
const
|
|
764
|
-
if (
|
|
765
|
-
|
|
898
|
+
const canCache = populateCache && !tooLarge;
|
|
899
|
+
if (canCache && !entityCache) {
|
|
900
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
766
901
|
}
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
902
|
+
const scored = candidateRows.map((row) => {
|
|
903
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
904
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
905
|
+
entityCache.set(row.id, vector);
|
|
906
|
+
}
|
|
907
|
+
let score = 0;
|
|
908
|
+
if (vector && vector.length === queryVec.length) {
|
|
909
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
910
|
+
if (weight !== void 0) {
|
|
911
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
912
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
913
|
+
} else {
|
|
914
|
+
score = cosSim;
|
|
915
|
+
}
|
|
916
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
917
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
918
|
+
score = (1 - weight) * kwScore;
|
|
919
|
+
} else {
|
|
920
|
+
score = -2;
|
|
921
|
+
}
|
|
922
|
+
return { row, score };
|
|
923
|
+
});
|
|
924
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
925
|
+
if (!this.vectorCache.has(entityId)) {
|
|
926
|
+
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
927
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
928
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
929
|
+
}
|
|
930
|
+
this.vectorCache.set(entityId, entityCache);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
scored.sort((a, b) => {
|
|
934
|
+
const scoreDiff = b.score - a.score;
|
|
935
|
+
if (scoreDiff !== 0) return scoreDiff;
|
|
936
|
+
const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
|
|
937
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
938
|
+
const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
|
|
939
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
940
|
+
return a.row.id.localeCompare(b.row.id);
|
|
941
|
+
});
|
|
942
|
+
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
943
|
+
if (topIds.length > 0) {
|
|
944
|
+
const fullRows = [];
|
|
945
|
+
const phase2ChunkSize = 500;
|
|
946
|
+
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
947
|
+
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
948
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
949
|
+
const chunkRows = await this.db.getAllAsync(
|
|
950
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
951
|
+
idChunk
|
|
952
|
+
);
|
|
953
|
+
fullRows.push(...chunkRows);
|
|
954
|
+
}
|
|
955
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
956
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
957
|
+
}
|
|
958
|
+
usedEmbed = true;
|
|
778
959
|
}
|
|
779
|
-
usedEmbed = true;
|
|
780
960
|
} catch (err) {
|
|
781
961
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
782
962
|
this.options.onRetrievalFallback?.(error);
|
|
@@ -789,25 +969,35 @@ var WikiMemory = class {
|
|
|
789
969
|
});
|
|
790
970
|
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
791
971
|
if (topIds.length > 0) {
|
|
792
|
-
const
|
|
793
|
-
const
|
|
794
|
-
|
|
795
|
-
topIds
|
|
796
|
-
|
|
797
|
-
|
|
972
|
+
const kwRows = [];
|
|
973
|
+
const kwChunkSize = 500;
|
|
974
|
+
for (let i = 0; i < topIds.length; i += kwChunkSize) {
|
|
975
|
+
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
976
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
977
|
+
const chunkRows = await this.db.getAllAsync(
|
|
978
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
979
|
+
idChunk
|
|
980
|
+
);
|
|
981
|
+
kwRows.push(...chunkRows);
|
|
982
|
+
}
|
|
983
|
+
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
798
984
|
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
799
985
|
}
|
|
800
986
|
}
|
|
801
987
|
if (facts.length > 0) {
|
|
802
988
|
const ids = facts.map((f) => f.id);
|
|
803
|
-
const placeholders = ids.map(() => "?").join(",");
|
|
804
989
|
const now = Date.now();
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
990
|
+
const accessChunkSize = 500;
|
|
991
|
+
for (let i = 0; i < ids.length; i += accessChunkSize) {
|
|
992
|
+
const idChunk = ids.slice(i, i + accessChunkSize);
|
|
993
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
994
|
+
await this.db.runAsync(
|
|
995
|
+
`UPDATE ${this.prefix}entries
|
|
996
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
997
|
+
WHERE id IN (${placeholders})`,
|
|
998
|
+
[now, ...idChunk]
|
|
999
|
+
);
|
|
1000
|
+
}
|
|
811
1001
|
}
|
|
812
1002
|
} else {
|
|
813
1003
|
facts = await this.db.getAllAsync(
|
|
@@ -834,7 +1024,7 @@ var WikiMemory = class {
|
|
|
834
1024
|
)
|
|
835
1025
|
]);
|
|
836
1026
|
const parsedFacts = facts.map((f) => {
|
|
837
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1027
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
838
1028
|
return {
|
|
839
1029
|
...rest,
|
|
840
1030
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -866,7 +1056,7 @@ var WikiMemory = class {
|
|
|
866
1056
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
867
1057
|
if (count - memoryCheckpoint >= threshold) {
|
|
868
1058
|
const jobKey = this._librarianKey(entityId);
|
|
869
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1059
|
+
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
870
1060
|
this.activeMaintenanceJobs.add(jobKey);
|
|
871
1061
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
872
1062
|
}
|
|
@@ -914,7 +1104,7 @@ var WikiMemory = class {
|
|
|
914
1104
|
LIMIT 100
|
|
915
1105
|
`, [entityId]);
|
|
916
1106
|
const currentFacts = currentFactsRows.map((f) => {
|
|
917
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1107
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
918
1108
|
return {
|
|
919
1109
|
...rest,
|
|
920
1110
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -968,10 +1158,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
968
1158
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
969
1159
|
}
|
|
970
1160
|
});
|
|
1161
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1162
|
+
this.vectorCache.delete(entityId);
|
|
971
1163
|
for (const fact of insertedFacts) {
|
|
972
1164
|
await this.embedFact(fact);
|
|
973
1165
|
}
|
|
974
|
-
|
|
1166
|
+
this.vectorCache.delete(entityId);
|
|
975
1167
|
}
|
|
976
1168
|
async _doRunHeal(entityId) {
|
|
977
1169
|
const now = Date.now();
|
|
@@ -1009,7 +1201,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
1009
1201
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
1010
1202
|
const userPrompt = `Heal Candidates:
|
|
1011
1203
|
${JSON.stringify(healCandidates.map((f) => {
|
|
1012
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1204
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1013
1205
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1014
1206
|
}), null, 2)}
|
|
1015
1207
|
|
|
@@ -1052,10 +1244,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1052
1244
|
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1053
1245
|
}
|
|
1054
1246
|
});
|
|
1247
|
+
this.vectorCache.delete(entityId);
|
|
1248
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1055
1249
|
for (const fact of insertedFacts) {
|
|
1056
1250
|
await this.embedFact(fact);
|
|
1057
1251
|
}
|
|
1058
|
-
|
|
1252
|
+
this.vectorCache.delete(entityId);
|
|
1059
1253
|
}
|
|
1060
1254
|
async runLibrarian(entityId) {
|
|
1061
1255
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -1068,6 +1262,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1068
1262
|
if (this._isReembedActive(entityId)) {
|
|
1069
1263
|
throw new WikiBusyError("reembed", entityId);
|
|
1070
1264
|
}
|
|
1265
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1266
|
+
throw new WikiBusyError("import", entityId);
|
|
1267
|
+
}
|
|
1268
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1269
|
+
throw new WikiBusyError("forget", entityId);
|
|
1270
|
+
}
|
|
1071
1271
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1072
1272
|
try {
|
|
1073
1273
|
await this._doRunLibrarian(entityId);
|
|
@@ -1086,6 +1286,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1086
1286
|
if (this._isReembedActive(entityId)) {
|
|
1087
1287
|
throw new WikiBusyError("reembed", entityId);
|
|
1088
1288
|
}
|
|
1289
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1290
|
+
throw new WikiBusyError("import", entityId);
|
|
1291
|
+
}
|
|
1292
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1293
|
+
throw new WikiBusyError("forget", entityId);
|
|
1294
|
+
}
|
|
1089
1295
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1090
1296
|
try {
|
|
1091
1297
|
await this._doRunHeal(entityId);
|
|
@@ -1093,9 +1299,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1093
1299
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
1094
1300
|
}
|
|
1095
1301
|
}
|
|
1096
|
-
async runReembed(entityId) {
|
|
1302
|
+
async runReembed(entityId, opts) {
|
|
1097
1303
|
const embedFn = this.options.llmProvider.embed;
|
|
1098
|
-
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1304
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
1099
1305
|
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1100
1306
|
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1101
1307
|
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
@@ -1116,6 +1322,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1116
1322
|
if (this._isIngestActiveFor(entityId)) {
|
|
1117
1323
|
throw new WikiBusyError("ingest", entityId);
|
|
1118
1324
|
}
|
|
1325
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1326
|
+
throw new WikiBusyError("import", entityId);
|
|
1327
|
+
}
|
|
1328
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1329
|
+
throw new WikiBusyError("forget", entityId);
|
|
1330
|
+
}
|
|
1119
1331
|
} else {
|
|
1120
1332
|
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1121
1333
|
throw new WikiBusyError("reembed", "*");
|
|
@@ -1132,6 +1344,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1132
1344
|
if (this.activeIngestJobs.size > 0) {
|
|
1133
1345
|
throw new WikiBusyError("ingest", "*");
|
|
1134
1346
|
}
|
|
1347
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
1348
|
+
throw new WikiBusyError("import", "*");
|
|
1349
|
+
}
|
|
1350
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
1351
|
+
throw new WikiBusyError("forget", "*");
|
|
1352
|
+
}
|
|
1135
1353
|
}
|
|
1136
1354
|
this.activeMaintenanceJobs.add(reembedKey);
|
|
1137
1355
|
try {
|
|
@@ -1141,17 +1359,64 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1141
1359
|
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1142
1360
|
params
|
|
1143
1361
|
);
|
|
1362
|
+
if (entityId) {
|
|
1363
|
+
this.vectorCache.delete(entityId);
|
|
1364
|
+
} else {
|
|
1365
|
+
this.vectorCache.clear();
|
|
1366
|
+
}
|
|
1367
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
1368
|
+
let effectiveSkip = skipExisting;
|
|
1369
|
+
if (skipExisting) {
|
|
1370
|
+
const mismatchRow = await this.db.getFirstAsync(
|
|
1371
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1372
|
+
);
|
|
1373
|
+
if (mismatchRow) {
|
|
1374
|
+
if (entityId) {
|
|
1375
|
+
const mismatchDim = parseInt(mismatchRow.value, 10);
|
|
1376
|
+
const staleForEntity = await this.db.getFirstAsync(
|
|
1377
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1378
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
1379
|
+
AND (
|
|
1380
|
+
embedding_blob IS NULL
|
|
1381
|
+
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
1382
|
+
)`,
|
|
1383
|
+
[entityId, mismatchDim]
|
|
1384
|
+
);
|
|
1385
|
+
if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
|
|
1386
|
+
} else {
|
|
1387
|
+
effectiveSkip = false;
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1144
1391
|
let embedded = 0;
|
|
1145
1392
|
let skipped = 0;
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1393
|
+
let failed = 0;
|
|
1394
|
+
try {
|
|
1395
|
+
for (const row of rows) {
|
|
1396
|
+
const existingBlob = row.embedding_blob;
|
|
1397
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
1398
|
+
if (effectiveSkip && blobIsValid) {
|
|
1399
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
1400
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
1401
|
+
skipped++;
|
|
1402
|
+
continue;
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
const success = await this.embedFact(row);
|
|
1406
|
+
if (success) embedded++;
|
|
1407
|
+
else failed++;
|
|
1408
|
+
}
|
|
1409
|
+
if (embedded > 0) {
|
|
1410
|
+
await this._reconcileEmbeddingDimension();
|
|
1411
|
+
}
|
|
1412
|
+
} finally {
|
|
1413
|
+
if (entityId) {
|
|
1414
|
+
this.vectorCache.delete(entityId);
|
|
1415
|
+
} else {
|
|
1416
|
+
this.vectorCache.clear();
|
|
1417
|
+
}
|
|
1153
1418
|
}
|
|
1154
|
-
return { embedded, skipped };
|
|
1419
|
+
return { embedded, skipped, failed };
|
|
1155
1420
|
} finally {
|
|
1156
1421
|
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1157
1422
|
}
|
|
@@ -1171,6 +1436,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1171
1436
|
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1172
1437
|
};
|
|
1173
1438
|
}
|
|
1439
|
+
clearVectorCache() {
|
|
1440
|
+
this.vectorCache.clear();
|
|
1441
|
+
}
|
|
1174
1442
|
async _getFullBundle(entityId, opts) {
|
|
1175
1443
|
const maxEvents = opts?.maxEvents;
|
|
1176
1444
|
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
@@ -1187,10 +1455,16 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1187
1455
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
1188
1456
|
]);
|
|
1189
1457
|
const facts = factsRaw.map((f) => {
|
|
1190
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1458
|
+
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
1459
|
+
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
1460
|
+
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
1461
|
+
new Uint8Array(c).set(embedding_blob);
|
|
1462
|
+
return new Uint8Array(c);
|
|
1463
|
+
})() : void 0;
|
|
1464
|
+
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
1191
1465
|
return {
|
|
1192
|
-
...
|
|
1193
|
-
tags: typeof
|
|
1466
|
+
...factBase,
|
|
1467
|
+
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
1194
1468
|
};
|
|
1195
1469
|
});
|
|
1196
1470
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
@@ -1217,7 +1491,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1217
1491
|
for (let i = 0; i < ids.length; i += BATCH) {
|
|
1218
1492
|
const batch = ids.slice(i, i + BATCH);
|
|
1219
1493
|
const batchResults = await Promise.all(
|
|
1220
|
-
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
1494
|
+
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
1221
1495
|
);
|
|
1222
1496
|
for (const [id, bundle] of batchResults) {
|
|
1223
1497
|
entities[id] = bundle;
|
|
@@ -1227,172 +1501,339 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1227
1501
|
}
|
|
1228
1502
|
async importDump(dump, opts) {
|
|
1229
1503
|
const merge = opts?.merge ?? false;
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1504
|
+
const entityIds = Object.keys(dump.entities);
|
|
1505
|
+
for (const entityId of entityIds) {
|
|
1506
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
|
|
1507
|
+
throw new WikiBusyError("import", entityId);
|
|
1508
|
+
}
|
|
1509
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1510
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1511
|
+
}
|
|
1512
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1513
|
+
throw new WikiBusyError("heal", entityId);
|
|
1514
|
+
}
|
|
1515
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1516
|
+
throw new WikiBusyError("prune", entityId);
|
|
1517
|
+
}
|
|
1518
|
+
if (this._isReembedActive(entityId)) {
|
|
1519
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1520
|
+
}
|
|
1521
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1522
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1523
|
+
}
|
|
1524
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1525
|
+
throw new WikiBusyError("forget", entityId);
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
1528
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1529
|
+
throw new WikiBusyError("import", "*");
|
|
1530
|
+
}
|
|
1531
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1532
|
+
for (const entityId of entityIds) {
|
|
1533
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1534
|
+
}
|
|
1535
|
+
try {
|
|
1536
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
1537
|
+
await this._doImportEntity(entityId, bundle, merge);
|
|
1538
|
+
}
|
|
1539
|
+
} finally {
|
|
1540
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1541
|
+
for (const entityId of entityIds) {
|
|
1542
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
}
|
|
1546
|
+
async _doImportEntity(entityId, bundle, merge) {
|
|
1547
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
1548
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Set();
|
|
1549
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
1550
|
+
await this.db.withTransactionAsync(async () => {
|
|
1551
|
+
if (!merge) {
|
|
1552
|
+
const now = Date.now();
|
|
1553
|
+
await this.db.runAsync(
|
|
1554
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1555
|
+
[now, now, entityId]
|
|
1556
|
+
);
|
|
1557
|
+
await this.db.runAsync(
|
|
1558
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1559
|
+
[now, now, entityId]
|
|
1560
|
+
);
|
|
1561
|
+
await this.db.runAsync(
|
|
1562
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1563
|
+
[entityId]
|
|
1564
|
+
);
|
|
1565
|
+
}
|
|
1566
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
1567
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
1568
|
+
const factLookupChunkSize = 500;
|
|
1569
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
1570
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
1571
|
+
if (factIdChunk.length === 0) continue;
|
|
1572
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
1573
|
+
const existingFacts = await this.db.getAllAsync(
|
|
1574
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
1575
|
+
factIdChunk
|
|
1576
|
+
);
|
|
1577
|
+
for (const existingFact of existingFacts) {
|
|
1578
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
1246
1579
|
}
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
const
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1580
|
+
}
|
|
1581
|
+
for (const fact of bundle.facts) {
|
|
1582
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
1583
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
1584
|
+
const existing = existingFactsById.get(fact.id);
|
|
1585
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
1586
|
+
let rawBlob = null;
|
|
1587
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
1588
|
+
rawBlob = rawBlobRaw;
|
|
1589
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
1590
|
+
const obj = rawBlobRaw;
|
|
1591
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
1592
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
1593
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
1594
|
+
const entries = Object.keys(obj);
|
|
1595
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
1596
|
+
const len = entries.length;
|
|
1597
|
+
rawBlob = new Uint8Array(len);
|
|
1598
|
+
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
1599
|
+
}
|
|
1260
1600
|
}
|
|
1261
1601
|
}
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
const
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1602
|
+
let blobData = null;
|
|
1603
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
1604
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
1605
|
+
new Uint8Array(copy).set(rawBlob);
|
|
1606
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
1607
|
+
let allFinite = true;
|
|
1608
|
+
for (let i = 0; i < floats.length; i++) {
|
|
1609
|
+
if (!isFinite(floats[i])) {
|
|
1610
|
+
allFinite = false;
|
|
1611
|
+
break;
|
|
1273
1612
|
}
|
|
1613
|
+
}
|
|
1614
|
+
if (allFinite) {
|
|
1615
|
+
blobData = rawBlob;
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
if (existing) {
|
|
1619
|
+
if (existing.entity_id !== entityId) {
|
|
1620
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
1621
|
+
continue;
|
|
1622
|
+
}
|
|
1623
|
+
if (merge) {
|
|
1624
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1625
|
+
}
|
|
1626
|
+
if (blobData != null) {
|
|
1627
|
+
await this.db.runAsync(
|
|
1628
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
1629
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
1630
|
+
);
|
|
1631
|
+
factsWithPreservedBlob.add(fact.id);
|
|
1632
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1633
|
+
} else {
|
|
1274
1634
|
await this.db.runAsync(
|
|
1275
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at =
|
|
1635
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
|
|
1276
1636
|
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
1277
1637
|
);
|
|
1278
|
-
|
|
1638
|
+
}
|
|
1639
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1640
|
+
upsertedFactIds.add(fact.id);
|
|
1641
|
+
} else {
|
|
1642
|
+
if (blobData != null) {
|
|
1643
|
+
await this.db.runAsync(
|
|
1644
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1645
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
1646
|
+
);
|
|
1647
|
+
factsWithPreservedBlob.add(fact.id);
|
|
1648
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1279
1649
|
} else {
|
|
1280
1650
|
await this.db.runAsync(
|
|
1281
1651
|
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1282
1652
|
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
1283
1653
|
);
|
|
1284
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1285
1654
|
}
|
|
1655
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1656
|
+
upsertedFactIds.add(fact.id);
|
|
1286
1657
|
}
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1658
|
+
}
|
|
1659
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
1660
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
1661
|
+
const taskLookupChunkSize = 500;
|
|
1662
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
1663
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
1664
|
+
if (taskIdChunk.length === 0) continue;
|
|
1665
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
1666
|
+
const existingTasks = await this.db.getAllAsync(
|
|
1667
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
1668
|
+
taskIdChunk
|
|
1669
|
+
);
|
|
1670
|
+
for (const existingTask of existingTasks) {
|
|
1671
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
for (const task of bundle.tasks) {
|
|
1675
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
1676
|
+
const existing = existingTasksById.get(task.id);
|
|
1677
|
+
if (existing) {
|
|
1678
|
+
if (existing.entity_id !== entityId) {
|
|
1679
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
1680
|
+
continue;
|
|
1681
|
+
}
|
|
1682
|
+
if (merge) {
|
|
1683
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1300
1684
|
}
|
|
1685
|
+
await this.db.runAsync(
|
|
1686
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
1687
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
1688
|
+
);
|
|
1689
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1690
|
+
} else {
|
|
1691
|
+
await this.db.runAsync(
|
|
1692
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1693
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
1694
|
+
);
|
|
1695
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1301
1696
|
}
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1697
|
+
}
|
|
1698
|
+
for (const event of bundle.events) {
|
|
1699
|
+
await this.db.runAsync(
|
|
1700
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
1701
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1702
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
1703
|
+
);
|
|
1704
|
+
}
|
|
1705
|
+
});
|
|
1706
|
+
this.vectorCache.delete(entityId);
|
|
1707
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1708
|
+
for (const fact of bundle.facts) {
|
|
1709
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
1710
|
+
await this.embedFact({
|
|
1711
|
+
id: fact.id,
|
|
1712
|
+
title: fact.title,
|
|
1713
|
+
body: fact.body,
|
|
1714
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
1715
|
+
});
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
try {
|
|
1719
|
+
const canonicalRow = await this.db.getFirstAsync(
|
|
1720
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
1721
|
+
);
|
|
1722
|
+
const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
|
|
1723
|
+
if (preservedBlobDims.size === 1) {
|
|
1724
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
1725
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
1726
|
+
await this.storeEmbeddingDimension(preservedDim);
|
|
1727
|
+
const staleMismatch = await this.db.getFirstAsync(
|
|
1728
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1729
|
+
);
|
|
1730
|
+
if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
|
|
1319
1731
|
await this.db.runAsync(
|
|
1320
|
-
`INSERT INTO ${this.prefix}
|
|
1321
|
-
[
|
|
1732
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1733
|
+
[String(preservedDim)]
|
|
1322
1734
|
);
|
|
1323
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1324
1735
|
}
|
|
1325
|
-
|
|
1326
|
-
|
|
1736
|
+
await this._reconcileEmbeddingDimension();
|
|
1737
|
+
} else {
|
|
1327
1738
|
await this.db.runAsync(
|
|
1328
|
-
`INSERT OR
|
|
1329
|
-
|
|
1330
|
-
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
1739
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1740
|
+
[String(canonicalDim)]
|
|
1331
1741
|
);
|
|
1332
1742
|
}
|
|
1333
|
-
})
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
await this.
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1743
|
+
} else if (preservedBlobDims.size > 1) {
|
|
1744
|
+
if (canonicalDim === null) {
|
|
1745
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
1746
|
+
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
1747
|
+
await this.db.runAsync(
|
|
1748
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1749
|
+
[String(sortedPreservedBlobDims[0])]
|
|
1750
|
+
);
|
|
1751
|
+
} else {
|
|
1752
|
+
await this.db.runAsync(
|
|
1753
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1754
|
+
[String(canonicalDim)]
|
|
1755
|
+
);
|
|
1342
1756
|
}
|
|
1343
1757
|
}
|
|
1758
|
+
} finally {
|
|
1759
|
+
this.vectorCache.delete(entityId);
|
|
1344
1760
|
}
|
|
1345
|
-
await this.rebuildMiniSearchIndex();
|
|
1346
1761
|
}
|
|
1347
1762
|
async forget(entityId, params) {
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
if (
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
} else {
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
const
|
|
1371
|
-
let
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
const
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1763
|
+
let blockingOperation = null;
|
|
1764
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1765
|
+
blockingOperation = "librarian";
|
|
1766
|
+
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1767
|
+
blockingOperation = "heal";
|
|
1768
|
+
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1769
|
+
blockingOperation = "prune";
|
|
1770
|
+
} else if (this._isReembedActive(entityId)) {
|
|
1771
|
+
blockingOperation = "reembed";
|
|
1772
|
+
} else if (this._isIngestActiveFor(entityId)) {
|
|
1773
|
+
blockingOperation = "ingest";
|
|
1774
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
1775
|
+
blockingOperation = "import";
|
|
1776
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
1777
|
+
blockingOperation = "forget";
|
|
1778
|
+
}
|
|
1779
|
+
if (blockingOperation !== null) {
|
|
1780
|
+
throw new WikiBusyError(blockingOperation, entityId);
|
|
1781
|
+
}
|
|
1782
|
+
const forgetKey = this._forgetKey(entityId);
|
|
1783
|
+
this.activeMaintenanceJobs.add(forgetKey);
|
|
1784
|
+
try {
|
|
1785
|
+
const now = Date.now();
|
|
1786
|
+
let deletedEntries = 0;
|
|
1787
|
+
let deletedTasks = 0;
|
|
1788
|
+
if (params.clearAll) {
|
|
1789
|
+
const [entriesRes, tasksRes] = await Promise.all([
|
|
1790
|
+
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
1791
|
+
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
1792
|
+
]);
|
|
1793
|
+
await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
|
|
1794
|
+
deletedEntries = entriesRes.changes;
|
|
1795
|
+
deletedTasks = tasksRes.changes;
|
|
1796
|
+
} else {
|
|
1797
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
1798
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
1799
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
1800
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
1378
1801
|
}
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1802
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
1803
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
1804
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
1805
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
1806
|
+
const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
|
|
1807
|
+
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
1808
|
+
let refPromise = null;
|
|
1809
|
+
if (sourceRef || sourceHash) {
|
|
1810
|
+
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
1811
|
+
const args = [now, now, entityId];
|
|
1812
|
+
if (sourceRef) {
|
|
1813
|
+
q += ` AND source_ref = ?`;
|
|
1814
|
+
args.push(sourceRef);
|
|
1815
|
+
}
|
|
1816
|
+
if (sourceHash) {
|
|
1817
|
+
q += ` AND source_hash = ?`;
|
|
1818
|
+
args.push(sourceHash);
|
|
1819
|
+
}
|
|
1820
|
+
refPromise = this.db.runAsync(q, args);
|
|
1382
1821
|
}
|
|
1383
|
-
|
|
1822
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
1823
|
+
entryPromise ?? Promise.resolve(null),
|
|
1824
|
+
taskPromise ?? Promise.resolve(null),
|
|
1825
|
+
refPromise ?? Promise.resolve(null)
|
|
1826
|
+
]);
|
|
1827
|
+
if (entryResult) deletedEntries += entryResult.changes;
|
|
1828
|
+
if (taskResult) deletedTasks += taskResult.changes;
|
|
1829
|
+
if (refResult) deletedEntries += refResult.changes;
|
|
1384
1830
|
}
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
if (entryResult) deletedEntries += entryResult.changes;
|
|
1391
|
-
if (taskResult) deletedTasks += taskResult.changes;
|
|
1392
|
-
if (refResult) deletedEntries += refResult.changes;
|
|
1831
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1832
|
+
this.vectorCache.delete(entityId);
|
|
1833
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1834
|
+
} finally {
|
|
1835
|
+
this.activeMaintenanceJobs.delete(forgetKey);
|
|
1393
1836
|
}
|
|
1394
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1395
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1396
1837
|
}
|
|
1397
1838
|
async ingestDocument(entityId, params) {
|
|
1398
1839
|
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
@@ -1420,6 +1861,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1420
1861
|
if (this._isReembedActive(entityId)) {
|
|
1421
1862
|
throw new WikiBusyError("reembed", entityId);
|
|
1422
1863
|
}
|
|
1864
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1865
|
+
throw new WikiBusyError("import", entityId);
|
|
1866
|
+
}
|
|
1867
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1868
|
+
throw new WikiBusyError("forget", entityId);
|
|
1869
|
+
}
|
|
1423
1870
|
this.activeIngestJobs.add(jobKey);
|
|
1424
1871
|
try {
|
|
1425
1872
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1467,16 +1914,31 @@ ${chunk}`;
|
|
|
1467
1914
|
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1468
1915
|
}
|
|
1469
1916
|
});
|
|
1917
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1918
|
+
this.vectorCache.delete(entityId);
|
|
1470
1919
|
for (const fact of insertedFacts) {
|
|
1471
1920
|
await this.embedFact(fact);
|
|
1472
1921
|
}
|
|
1473
|
-
|
|
1922
|
+
this.vectorCache.delete(entityId);
|
|
1474
1923
|
return { truncated, chunks: chunks.length };
|
|
1475
1924
|
} finally {
|
|
1476
1925
|
this.activeIngestJobs.delete(jobKey);
|
|
1477
1926
|
}
|
|
1478
1927
|
}
|
|
1479
1928
|
};
|
|
1929
|
+
/**
|
|
1930
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
1931
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
1932
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
1933
|
+
*/
|
|
1934
|
+
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
1935
|
+
/**
|
|
1936
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
1937
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
1938
|
+
* entities while still maintaining a bounded memory footprint.
|
|
1939
|
+
*/
|
|
1940
|
+
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
1941
|
+
var WikiMemory = _WikiMemory;
|
|
1480
1942
|
|
|
1481
1943
|
// src/utils/formatContext.ts
|
|
1482
1944
|
function validateMaxOption(value, name) {
|
|
@@ -1672,8 +2134,23 @@ function formatMemoryDump(dump) {
|
|
|
1672
2134
|
name: formatEntityFileName(entityId),
|
|
1673
2135
|
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1674
2136
|
}));
|
|
2137
|
+
const manifestDump = {
|
|
2138
|
+
generatedAt: dump.generatedAt,
|
|
2139
|
+
entities: Object.fromEntries(
|
|
2140
|
+
Object.entries(dump.entities).map(([entityId, bundle]) => [
|
|
2141
|
+
entityId,
|
|
2142
|
+
{
|
|
2143
|
+
...bundle,
|
|
2144
|
+
facts: bundle.facts.map((f) => {
|
|
2145
|
+
const { embedding_blob: _blob, ...rest } = f;
|
|
2146
|
+
return rest;
|
|
2147
|
+
})
|
|
2148
|
+
}
|
|
2149
|
+
])
|
|
2150
|
+
)
|
|
2151
|
+
};
|
|
1675
2152
|
return {
|
|
1676
|
-
manifest: JSON.stringify(
|
|
2153
|
+
manifest: JSON.stringify(manifestDump, null, 2),
|
|
1677
2154
|
files
|
|
1678
2155
|
};
|
|
1679
2156
|
}
|