@equationalapplications/core-llm-wiki 2.6.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -1
- package/dist/index.d.mts +77 -5
- package/dist/index.d.ts +77 -5
- package/dist/index.js +696 -219
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +696 -219
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -18,7 +18,8 @@ async function setupDatabase(db, prefix) {
|
|
|
18
18
|
last_accessed_at INTEGER,
|
|
19
19
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
20
20
|
deleted_at INTEGER,
|
|
21
|
-
embedding TEXT
|
|
21
|
+
embedding TEXT,
|
|
22
|
+
embedding_blob BLOB
|
|
22
23
|
);
|
|
23
24
|
|
|
24
25
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -91,6 +92,20 @@ var MIGRATIONS = [
|
|
|
91
92
|
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
92
93
|
}
|
|
93
94
|
}
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
version: 3,
|
|
98
|
+
description: "Add embedding_blob BLOB column for Float32Array vector storage",
|
|
99
|
+
run: async (db, prefix) => {
|
|
100
|
+
const cols = await db.getAllAsync(
|
|
101
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
102
|
+
);
|
|
103
|
+
if (!cols.some((c) => c.name === "embedding_blob")) {
|
|
104
|
+
await db.execAsync(
|
|
105
|
+
`ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
94
109
|
}
|
|
95
110
|
];
|
|
96
111
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -148,6 +163,34 @@ function cosineSimilarity(a, b) {
|
|
|
148
163
|
return denom === 0 ? 0 : dot / denom;
|
|
149
164
|
}
|
|
150
165
|
|
|
166
|
+
// src/utils/embedding.ts
|
|
167
|
+
function parseEmbedding(blob, text) {
|
|
168
|
+
if (blob && blob.byteLength > 0) {
|
|
169
|
+
if (blob.byteLength % 4 !== 0) return null;
|
|
170
|
+
const copy = new ArrayBuffer(blob.byteLength);
|
|
171
|
+
new Uint8Array(copy).set(blob);
|
|
172
|
+
const vector = new Float32Array(copy);
|
|
173
|
+
for (const value of vector) {
|
|
174
|
+
if (!Number.isFinite(value)) return null;
|
|
175
|
+
}
|
|
176
|
+
return vector;
|
|
177
|
+
}
|
|
178
|
+
if (text) {
|
|
179
|
+
try {
|
|
180
|
+
const arr = JSON.parse(text);
|
|
181
|
+
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
182
|
+
const vector = new Float32Array(arr);
|
|
183
|
+
for (const value of vector) {
|
|
184
|
+
if (!Number.isFinite(value)) return null;
|
|
185
|
+
}
|
|
186
|
+
return vector;
|
|
187
|
+
} catch {
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
|
|
151
194
|
// src/WikiMemory.ts
|
|
152
195
|
function parseJsonResponse(text) {
|
|
153
196
|
const firstBrace = text.indexOf("{");
|
|
@@ -352,7 +395,7 @@ function jaccardScore(a, b) {
|
|
|
352
395
|
}
|
|
353
396
|
var FUZZY_THRESHOLD = 0.5;
|
|
354
397
|
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
355
|
-
var
|
|
398
|
+
var _WikiMemory = class _WikiMemory {
|
|
356
399
|
constructor(db, options) {
|
|
357
400
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
358
401
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
@@ -366,6 +409,7 @@ var WikiMemory = class {
|
|
|
366
409
|
}
|
|
367
410
|
});
|
|
368
411
|
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
412
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
369
413
|
this.db = db;
|
|
370
414
|
this.options = options;
|
|
371
415
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
@@ -432,10 +476,6 @@ var WikiMemory = class {
|
|
|
432
476
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
433
477
|
[String(dim)]
|
|
434
478
|
);
|
|
435
|
-
} else {
|
|
436
|
-
await this.db.runAsync(
|
|
437
|
-
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
438
|
-
);
|
|
439
479
|
}
|
|
440
480
|
} else {
|
|
441
481
|
await this.db.runAsync(
|
|
@@ -454,7 +494,18 @@ var WikiMemory = class {
|
|
|
454
494
|
const mismatch = await this.db.getFirstAsync(
|
|
455
495
|
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
456
496
|
);
|
|
457
|
-
if (mismatch)
|
|
497
|
+
if (!mismatch) return;
|
|
498
|
+
const newDim = parseInt(mismatch.value, 10);
|
|
499
|
+
const residual = await this.db.getFirstAsync(
|
|
500
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
501
|
+
WHERE deleted_at IS NULL
|
|
502
|
+
AND (
|
|
503
|
+
(embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
|
|
504
|
+
OR (embedding_blob IS NULL AND embedding IS NOT NULL)
|
|
505
|
+
)`,
|
|
506
|
+
[newDim]
|
|
507
|
+
);
|
|
508
|
+
if (!residual || residual.cnt === 0) {
|
|
458
509
|
await this.db.runAsync(
|
|
459
510
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
460
511
|
[mismatch.value]
|
|
@@ -485,10 +536,23 @@ var WikiMemory = class {
|
|
|
485
536
|
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
486
537
|
return false;
|
|
487
538
|
}
|
|
488
|
-
|
|
539
|
+
const float32Vector = new Float32Array(vector);
|
|
540
|
+
let hasNonFinite = false;
|
|
541
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
542
|
+
if (!isFinite(float32Vector[i])) {
|
|
543
|
+
hasNonFinite = true;
|
|
544
|
+
break;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
if (hasNonFinite) {
|
|
548
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
549
|
+
return false;
|
|
550
|
+
}
|
|
551
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
552
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
489
553
|
await this.db.runAsync(
|
|
490
|
-
`UPDATE ${this.prefix}entries SET embedding =
|
|
491
|
-
[
|
|
554
|
+
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
555
|
+
[blob, fact.id]
|
|
492
556
|
);
|
|
493
557
|
return true;
|
|
494
558
|
} catch (err) {
|
|
@@ -608,9 +672,24 @@ var WikiMemory = class {
|
|
|
608
672
|
_globalReembedKey() {
|
|
609
673
|
return `${this.prefix}:reembed`;
|
|
610
674
|
}
|
|
675
|
+
_importKey(entityId) {
|
|
676
|
+
return `${this.prefix}:${entityId}:import`;
|
|
677
|
+
}
|
|
678
|
+
_globalImportKey() {
|
|
679
|
+
return `${this.prefix}:import`;
|
|
680
|
+
}
|
|
681
|
+
_forgetKey(entityId) {
|
|
682
|
+
return `${this.prefix}:${entityId}:forget`;
|
|
683
|
+
}
|
|
611
684
|
_isReembedActive(entityId) {
|
|
612
685
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
613
686
|
}
|
|
687
|
+
_isImportActiveFor(entityId) {
|
|
688
|
+
return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
|
|
689
|
+
}
|
|
690
|
+
_isForgetActiveFor(entityId) {
|
|
691
|
+
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
692
|
+
}
|
|
614
693
|
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
615
694
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
616
695
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
@@ -653,6 +732,10 @@ var WikiMemory = class {
|
|
|
653
732
|
blockingOperation = "reembed";
|
|
654
733
|
} else if (isIngestRunning) {
|
|
655
734
|
blockingOperation = "ingest";
|
|
735
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
736
|
+
blockingOperation = "import";
|
|
737
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
738
|
+
blockingOperation = "forget";
|
|
656
739
|
}
|
|
657
740
|
if (blockingOperation !== null) {
|
|
658
741
|
throw new WikiBusyError(blockingOperation, entityId);
|
|
@@ -697,19 +780,27 @@ var WikiMemory = class {
|
|
|
697
780
|
await this.db.execAsync(`VACUUM`);
|
|
698
781
|
}
|
|
699
782
|
await this.rebuildMiniSearchIndex(entityId);
|
|
783
|
+
this.vectorCache.delete(entityId);
|
|
700
784
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
701
785
|
} finally {
|
|
702
786
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
703
787
|
}
|
|
704
788
|
}
|
|
705
|
-
async read(entityId, query) {
|
|
706
|
-
const
|
|
789
|
+
async read(entityId, query, options) {
|
|
790
|
+
const config = this.options.config;
|
|
791
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
792
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
793
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
794
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
795
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
796
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
797
|
+
const skipEmbed = weight === 0;
|
|
707
798
|
const embedFn = this.options.llmProvider.embed;
|
|
708
799
|
const trimmedQuery = query.trim();
|
|
709
800
|
let facts = [];
|
|
710
|
-
if (trimmedQuery) {
|
|
801
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
711
802
|
let usedEmbed = false;
|
|
712
|
-
if (embedFn) {
|
|
803
|
+
if (!skipEmbed && embedFn) {
|
|
713
804
|
try {
|
|
714
805
|
const queryVec = await embedFn(trimmedQuery);
|
|
715
806
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -728,49 +819,138 @@ var WikiMemory = class {
|
|
|
728
819
|
);
|
|
729
820
|
}
|
|
730
821
|
}
|
|
731
|
-
const
|
|
732
|
-
`SELECT
|
|
733
|
-
|
|
822
|
+
const mismatchedCount = await this.db.getFirstAsync(
|
|
823
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
824
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
825
|
+
AND embedding_blob IS NOT NULL
|
|
826
|
+
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
827
|
+
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
828
|
+
[entityId, queryVec.length]
|
|
734
829
|
);
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
830
|
+
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
831
|
+
throw new Error(
|
|
832
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
let candidateRows;
|
|
836
|
+
let populateCache = true;
|
|
837
|
+
let miniSearchScores;
|
|
838
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
839
|
+
populateCache = false;
|
|
840
|
+
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
841
|
+
filter: (r) => r.entity_id === entityId,
|
|
842
|
+
combineWith: "OR"
|
|
843
|
+
});
|
|
844
|
+
if (preResults.length === 0) {
|
|
845
|
+
candidateRows = null;
|
|
846
|
+
} else {
|
|
847
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
848
|
+
if (topKResults.length === 0) {
|
|
849
|
+
candidateRows = null;
|
|
850
|
+
} else {
|
|
851
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
852
|
+
const inClauseChunkSize = 500;
|
|
853
|
+
candidateRows = [];
|
|
854
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
855
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
856
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
857
|
+
const chunkRows = await this.db.getAllAsync(
|
|
858
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
859
|
+
idChunk
|
|
860
|
+
);
|
|
861
|
+
candidateRows.push(...chunkRows);
|
|
862
|
+
}
|
|
863
|
+
if (weight !== void 0 && weight < 1) {
|
|
864
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
865
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
742
866
|
}
|
|
743
|
-
} catch {
|
|
744
867
|
}
|
|
745
868
|
}
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
869
|
+
} else {
|
|
870
|
+
candidateRows = await this.db.getAllAsync(
|
|
871
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
872
|
+
[entityId]
|
|
873
|
+
);
|
|
874
|
+
if (weight !== void 0 && weight < 1) {
|
|
875
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
876
|
+
filter: (r) => r.entity_id === entityId,
|
|
877
|
+
combineWith: "OR"
|
|
878
|
+
});
|
|
879
|
+
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
880
|
+
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
752
881
|
}
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
882
|
+
}
|
|
883
|
+
if (candidateRows === null) {
|
|
884
|
+
usedEmbed = true;
|
|
885
|
+
} else {
|
|
886
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
887
|
+
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
888
|
+
if (tooLarge && entityCache) {
|
|
889
|
+
this.vectorCache.delete(entityId);
|
|
890
|
+
entityCache = void 0;
|
|
756
891
|
}
|
|
757
|
-
const
|
|
758
|
-
if (
|
|
759
|
-
|
|
892
|
+
const canCache = populateCache && !tooLarge;
|
|
893
|
+
if (canCache && !entityCache) {
|
|
894
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
760
895
|
}
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
896
|
+
const scored = candidateRows.map((row) => {
|
|
897
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
898
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
899
|
+
entityCache.set(row.id, vector);
|
|
900
|
+
}
|
|
901
|
+
let score = 0;
|
|
902
|
+
if (vector && vector.length === queryVec.length) {
|
|
903
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
904
|
+
if (weight !== void 0) {
|
|
905
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
906
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
907
|
+
} else {
|
|
908
|
+
score = cosSim;
|
|
909
|
+
}
|
|
910
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
911
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
912
|
+
score = (1 - weight) * kwScore;
|
|
913
|
+
} else {
|
|
914
|
+
score = -2;
|
|
915
|
+
}
|
|
916
|
+
return { row, score };
|
|
917
|
+
});
|
|
918
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
919
|
+
if (!this.vectorCache.has(entityId)) {
|
|
920
|
+
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
921
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
922
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
923
|
+
}
|
|
924
|
+
this.vectorCache.set(entityId, entityCache);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
scored.sort((a, b) => {
|
|
928
|
+
const scoreDiff = b.score - a.score;
|
|
929
|
+
if (scoreDiff !== 0) return scoreDiff;
|
|
930
|
+
const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
|
|
931
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
932
|
+
const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
|
|
933
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
934
|
+
return a.row.id.localeCompare(b.row.id);
|
|
935
|
+
});
|
|
936
|
+
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
937
|
+
if (topIds.length > 0) {
|
|
938
|
+
const fullRows = [];
|
|
939
|
+
const phase2ChunkSize = 500;
|
|
940
|
+
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
941
|
+
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
942
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
943
|
+
const chunkRows = await this.db.getAllAsync(
|
|
944
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
945
|
+
idChunk
|
|
946
|
+
);
|
|
947
|
+
fullRows.push(...chunkRows);
|
|
948
|
+
}
|
|
949
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
950
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
951
|
+
}
|
|
952
|
+
usedEmbed = true;
|
|
772
953
|
}
|
|
773
|
-
usedEmbed = true;
|
|
774
954
|
} catch (err) {
|
|
775
955
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
776
956
|
this.options.onRetrievalFallback?.(error);
|
|
@@ -783,25 +963,35 @@ var WikiMemory = class {
|
|
|
783
963
|
});
|
|
784
964
|
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
785
965
|
if (topIds.length > 0) {
|
|
786
|
-
const
|
|
787
|
-
const
|
|
788
|
-
|
|
789
|
-
topIds
|
|
790
|
-
|
|
791
|
-
|
|
966
|
+
const kwRows = [];
|
|
967
|
+
const kwChunkSize = 500;
|
|
968
|
+
for (let i = 0; i < topIds.length; i += kwChunkSize) {
|
|
969
|
+
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
970
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
971
|
+
const chunkRows = await this.db.getAllAsync(
|
|
972
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
973
|
+
idChunk
|
|
974
|
+
);
|
|
975
|
+
kwRows.push(...chunkRows);
|
|
976
|
+
}
|
|
977
|
+
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
792
978
|
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
793
979
|
}
|
|
794
980
|
}
|
|
795
981
|
if (facts.length > 0) {
|
|
796
982
|
const ids = facts.map((f) => f.id);
|
|
797
|
-
const placeholders = ids.map(() => "?").join(",");
|
|
798
983
|
const now = Date.now();
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
984
|
+
const accessChunkSize = 500;
|
|
985
|
+
for (let i = 0; i < ids.length; i += accessChunkSize) {
|
|
986
|
+
const idChunk = ids.slice(i, i + accessChunkSize);
|
|
987
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
988
|
+
await this.db.runAsync(
|
|
989
|
+
`UPDATE ${this.prefix}entries
|
|
990
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
991
|
+
WHERE id IN (${placeholders})`,
|
|
992
|
+
[now, ...idChunk]
|
|
993
|
+
);
|
|
994
|
+
}
|
|
805
995
|
}
|
|
806
996
|
} else {
|
|
807
997
|
facts = await this.db.getAllAsync(
|
|
@@ -828,7 +1018,7 @@ var WikiMemory = class {
|
|
|
828
1018
|
)
|
|
829
1019
|
]);
|
|
830
1020
|
const parsedFacts = facts.map((f) => {
|
|
831
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1021
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
832
1022
|
return {
|
|
833
1023
|
...rest,
|
|
834
1024
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -860,7 +1050,7 @@ var WikiMemory = class {
|
|
|
860
1050
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
861
1051
|
if (count - memoryCheckpoint >= threshold) {
|
|
862
1052
|
const jobKey = this._librarianKey(entityId);
|
|
863
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1053
|
+
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
864
1054
|
this.activeMaintenanceJobs.add(jobKey);
|
|
865
1055
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
866
1056
|
}
|
|
@@ -908,7 +1098,7 @@ var WikiMemory = class {
|
|
|
908
1098
|
LIMIT 100
|
|
909
1099
|
`, [entityId]);
|
|
910
1100
|
const currentFacts = currentFactsRows.map((f) => {
|
|
911
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1101
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
912
1102
|
return {
|
|
913
1103
|
...rest,
|
|
914
1104
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -962,10 +1152,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
962
1152
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
963
1153
|
}
|
|
964
1154
|
});
|
|
1155
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1156
|
+
this.vectorCache.delete(entityId);
|
|
965
1157
|
for (const fact of insertedFacts) {
|
|
966
1158
|
await this.embedFact(fact);
|
|
967
1159
|
}
|
|
968
|
-
|
|
1160
|
+
this.vectorCache.delete(entityId);
|
|
969
1161
|
}
|
|
970
1162
|
async _doRunHeal(entityId) {
|
|
971
1163
|
const now = Date.now();
|
|
@@ -1003,7 +1195,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
1003
1195
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
1004
1196
|
const userPrompt = `Heal Candidates:
|
|
1005
1197
|
${JSON.stringify(healCandidates.map((f) => {
|
|
1006
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1198
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1007
1199
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1008
1200
|
}), null, 2)}
|
|
1009
1201
|
|
|
@@ -1046,10 +1238,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1046
1238
|
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1047
1239
|
}
|
|
1048
1240
|
});
|
|
1241
|
+
this.vectorCache.delete(entityId);
|
|
1242
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1049
1243
|
for (const fact of insertedFacts) {
|
|
1050
1244
|
await this.embedFact(fact);
|
|
1051
1245
|
}
|
|
1052
|
-
|
|
1246
|
+
this.vectorCache.delete(entityId);
|
|
1053
1247
|
}
|
|
1054
1248
|
async runLibrarian(entityId) {
|
|
1055
1249
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -1062,6 +1256,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1062
1256
|
if (this._isReembedActive(entityId)) {
|
|
1063
1257
|
throw new WikiBusyError("reembed", entityId);
|
|
1064
1258
|
}
|
|
1259
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1260
|
+
throw new WikiBusyError("import", entityId);
|
|
1261
|
+
}
|
|
1262
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1263
|
+
throw new WikiBusyError("forget", entityId);
|
|
1264
|
+
}
|
|
1065
1265
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1066
1266
|
try {
|
|
1067
1267
|
await this._doRunLibrarian(entityId);
|
|
@@ -1080,6 +1280,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1080
1280
|
if (this._isReembedActive(entityId)) {
|
|
1081
1281
|
throw new WikiBusyError("reembed", entityId);
|
|
1082
1282
|
}
|
|
1283
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1284
|
+
throw new WikiBusyError("import", entityId);
|
|
1285
|
+
}
|
|
1286
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1287
|
+
throw new WikiBusyError("forget", entityId);
|
|
1288
|
+
}
|
|
1083
1289
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1084
1290
|
try {
|
|
1085
1291
|
await this._doRunHeal(entityId);
|
|
@@ -1087,9 +1293,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1087
1293
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
1088
1294
|
}
|
|
1089
1295
|
}
|
|
1090
|
-
async runReembed(entityId) {
|
|
1296
|
+
async runReembed(entityId, opts) {
|
|
1091
1297
|
const embedFn = this.options.llmProvider.embed;
|
|
1092
|
-
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1298
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
1093
1299
|
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1094
1300
|
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1095
1301
|
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
@@ -1110,6 +1316,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1110
1316
|
if (this._isIngestActiveFor(entityId)) {
|
|
1111
1317
|
throw new WikiBusyError("ingest", entityId);
|
|
1112
1318
|
}
|
|
1319
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1320
|
+
throw new WikiBusyError("import", entityId);
|
|
1321
|
+
}
|
|
1322
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1323
|
+
throw new WikiBusyError("forget", entityId);
|
|
1324
|
+
}
|
|
1113
1325
|
} else {
|
|
1114
1326
|
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1115
1327
|
throw new WikiBusyError("reembed", "*");
|
|
@@ -1126,6 +1338,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1126
1338
|
if (this.activeIngestJobs.size > 0) {
|
|
1127
1339
|
throw new WikiBusyError("ingest", "*");
|
|
1128
1340
|
}
|
|
1341
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
1342
|
+
throw new WikiBusyError("import", "*");
|
|
1343
|
+
}
|
|
1344
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
1345
|
+
throw new WikiBusyError("forget", "*");
|
|
1346
|
+
}
|
|
1129
1347
|
}
|
|
1130
1348
|
this.activeMaintenanceJobs.add(reembedKey);
|
|
1131
1349
|
try {
|
|
@@ -1135,17 +1353,64 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1135
1353
|
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1136
1354
|
params
|
|
1137
1355
|
);
|
|
1356
|
+
if (entityId) {
|
|
1357
|
+
this.vectorCache.delete(entityId);
|
|
1358
|
+
} else {
|
|
1359
|
+
this.vectorCache.clear();
|
|
1360
|
+
}
|
|
1361
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
1362
|
+
let effectiveSkip = skipExisting;
|
|
1363
|
+
if (skipExisting) {
|
|
1364
|
+
const mismatchRow = await this.db.getFirstAsync(
|
|
1365
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1366
|
+
);
|
|
1367
|
+
if (mismatchRow) {
|
|
1368
|
+
if (entityId) {
|
|
1369
|
+
const mismatchDim = parseInt(mismatchRow.value, 10);
|
|
1370
|
+
const staleForEntity = await this.db.getFirstAsync(
|
|
1371
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1372
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
1373
|
+
AND (
|
|
1374
|
+
embedding_blob IS NULL
|
|
1375
|
+
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
1376
|
+
)`,
|
|
1377
|
+
[entityId, mismatchDim]
|
|
1378
|
+
);
|
|
1379
|
+
if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
|
|
1380
|
+
} else {
|
|
1381
|
+
effectiveSkip = false;
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1138
1385
|
let embedded = 0;
|
|
1139
1386
|
let skipped = 0;
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1387
|
+
let failed = 0;
|
|
1388
|
+
try {
|
|
1389
|
+
for (const row of rows) {
|
|
1390
|
+
const existingBlob = row.embedding_blob;
|
|
1391
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
1392
|
+
if (effectiveSkip && blobIsValid) {
|
|
1393
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
1394
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
1395
|
+
skipped++;
|
|
1396
|
+
continue;
|
|
1397
|
+
}
|
|
1398
|
+
}
|
|
1399
|
+
const success = await this.embedFact(row);
|
|
1400
|
+
if (success) embedded++;
|
|
1401
|
+
else failed++;
|
|
1402
|
+
}
|
|
1403
|
+
if (embedded > 0) {
|
|
1404
|
+
await this._reconcileEmbeddingDimension();
|
|
1405
|
+
}
|
|
1406
|
+
} finally {
|
|
1407
|
+
if (entityId) {
|
|
1408
|
+
this.vectorCache.delete(entityId);
|
|
1409
|
+
} else {
|
|
1410
|
+
this.vectorCache.clear();
|
|
1411
|
+
}
|
|
1147
1412
|
}
|
|
1148
|
-
return { embedded, skipped };
|
|
1413
|
+
return { embedded, skipped, failed };
|
|
1149
1414
|
} finally {
|
|
1150
1415
|
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1151
1416
|
}
|
|
@@ -1165,6 +1430,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1165
1430
|
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1166
1431
|
};
|
|
1167
1432
|
}
|
|
1433
|
+
clearVectorCache() {
|
|
1434
|
+
this.vectorCache.clear();
|
|
1435
|
+
}
|
|
1168
1436
|
async _getFullBundle(entityId, opts) {
|
|
1169
1437
|
const maxEvents = opts?.maxEvents;
|
|
1170
1438
|
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
@@ -1181,10 +1449,16 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1181
1449
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
1182
1450
|
]);
|
|
1183
1451
|
const facts = factsRaw.map((f) => {
|
|
1184
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1452
|
+
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
1453
|
+
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
1454
|
+
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
1455
|
+
new Uint8Array(c).set(embedding_blob);
|
|
1456
|
+
return new Uint8Array(c);
|
|
1457
|
+
})() : void 0;
|
|
1458
|
+
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
1185
1459
|
return {
|
|
1186
|
-
...
|
|
1187
|
-
tags: typeof
|
|
1460
|
+
...factBase,
|
|
1461
|
+
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
1188
1462
|
};
|
|
1189
1463
|
});
|
|
1190
1464
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
@@ -1211,7 +1485,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1211
1485
|
for (let i = 0; i < ids.length; i += BATCH) {
|
|
1212
1486
|
const batch = ids.slice(i, i + BATCH);
|
|
1213
1487
|
const batchResults = await Promise.all(
|
|
1214
|
-
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
1488
|
+
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
1215
1489
|
);
|
|
1216
1490
|
for (const [id, bundle] of batchResults) {
|
|
1217
1491
|
entities[id] = bundle;
|
|
@@ -1221,172 +1495,339 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1221
1495
|
}
|
|
1222
1496
|
async importDump(dump, opts) {
|
|
1223
1497
|
const merge = opts?.merge ?? false;
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1498
|
+
const entityIds = Object.keys(dump.entities);
|
|
1499
|
+
for (const entityId of entityIds) {
|
|
1500
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
|
|
1501
|
+
throw new WikiBusyError("import", entityId);
|
|
1502
|
+
}
|
|
1503
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1504
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1505
|
+
}
|
|
1506
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1507
|
+
throw new WikiBusyError("heal", entityId);
|
|
1508
|
+
}
|
|
1509
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1510
|
+
throw new WikiBusyError("prune", entityId);
|
|
1511
|
+
}
|
|
1512
|
+
if (this._isReembedActive(entityId)) {
|
|
1513
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1514
|
+
}
|
|
1515
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1516
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1517
|
+
}
|
|
1518
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1519
|
+
throw new WikiBusyError("forget", entityId);
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1523
|
+
throw new WikiBusyError("import", "*");
|
|
1524
|
+
}
|
|
1525
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1526
|
+
for (const entityId of entityIds) {
|
|
1527
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1528
|
+
}
|
|
1529
|
+
try {
|
|
1530
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
1531
|
+
await this._doImportEntity(entityId, bundle, merge);
|
|
1532
|
+
}
|
|
1533
|
+
} finally {
|
|
1534
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1535
|
+
for (const entityId of entityIds) {
|
|
1536
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
async _doImportEntity(entityId, bundle, merge) {
|
|
1541
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
1542
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Set();
|
|
1543
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
1544
|
+
await this.db.withTransactionAsync(async () => {
|
|
1545
|
+
if (!merge) {
|
|
1546
|
+
const now = Date.now();
|
|
1547
|
+
await this.db.runAsync(
|
|
1548
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1549
|
+
[now, now, entityId]
|
|
1550
|
+
);
|
|
1551
|
+
await this.db.runAsync(
|
|
1552
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1553
|
+
[now, now, entityId]
|
|
1554
|
+
);
|
|
1555
|
+
await this.db.runAsync(
|
|
1556
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1557
|
+
[entityId]
|
|
1558
|
+
);
|
|
1559
|
+
}
|
|
1560
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
1561
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
1562
|
+
const factLookupChunkSize = 500;
|
|
1563
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
1564
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
1565
|
+
if (factIdChunk.length === 0) continue;
|
|
1566
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
1567
|
+
const existingFacts = await this.db.getAllAsync(
|
|
1568
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
1569
|
+
factIdChunk
|
|
1570
|
+
);
|
|
1571
|
+
for (const existingFact of existingFacts) {
|
|
1572
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
1240
1573
|
}
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
const
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1574
|
+
}
|
|
1575
|
+
for (const fact of bundle.facts) {
|
|
1576
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
1577
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
1578
|
+
const existing = existingFactsById.get(fact.id);
|
|
1579
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
1580
|
+
let rawBlob = null;
|
|
1581
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
1582
|
+
rawBlob = rawBlobRaw;
|
|
1583
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
1584
|
+
const obj = rawBlobRaw;
|
|
1585
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
1586
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
1587
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
1588
|
+
const entries = Object.keys(obj);
|
|
1589
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
1590
|
+
const len = entries.length;
|
|
1591
|
+
rawBlob = new Uint8Array(len);
|
|
1592
|
+
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
1593
|
+
}
|
|
1254
1594
|
}
|
|
1255
1595
|
}
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
const
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1596
|
+
let blobData = null;
|
|
1597
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
1598
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
1599
|
+
new Uint8Array(copy).set(rawBlob);
|
|
1600
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
1601
|
+
let allFinite = true;
|
|
1602
|
+
for (let i = 0; i < floats.length; i++) {
|
|
1603
|
+
if (!isFinite(floats[i])) {
|
|
1604
|
+
allFinite = false;
|
|
1605
|
+
break;
|
|
1267
1606
|
}
|
|
1607
|
+
}
|
|
1608
|
+
if (allFinite) {
|
|
1609
|
+
blobData = rawBlob;
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
if (existing) {
|
|
1613
|
+
if (existing.entity_id !== entityId) {
|
|
1614
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
1615
|
+
continue;
|
|
1616
|
+
}
|
|
1617
|
+
if (merge) {
|
|
1618
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1619
|
+
}
|
|
1620
|
+
if (blobData != null) {
|
|
1621
|
+
await this.db.runAsync(
|
|
1622
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
1623
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
1624
|
+
);
|
|
1625
|
+
factsWithPreservedBlob.add(fact.id);
|
|
1626
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1627
|
+
} else {
|
|
1268
1628
|
await this.db.runAsync(
|
|
1269
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at =
|
|
1629
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
|
|
1270
1630
|
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
1271
1631
|
);
|
|
1272
|
-
|
|
1632
|
+
}
|
|
1633
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1634
|
+
upsertedFactIds.add(fact.id);
|
|
1635
|
+
} else {
|
|
1636
|
+
if (blobData != null) {
|
|
1637
|
+
await this.db.runAsync(
|
|
1638
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1639
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
1640
|
+
);
|
|
1641
|
+
factsWithPreservedBlob.add(fact.id);
|
|
1642
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1273
1643
|
} else {
|
|
1274
1644
|
await this.db.runAsync(
|
|
1275
1645
|
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1276
1646
|
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
1277
1647
|
);
|
|
1278
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1279
1648
|
}
|
|
1649
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1650
|
+
upsertedFactIds.add(fact.id);
|
|
1280
1651
|
}
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1652
|
+
}
|
|
1653
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
1654
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
1655
|
+
const taskLookupChunkSize = 500;
|
|
1656
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
1657
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
1658
|
+
if (taskIdChunk.length === 0) continue;
|
|
1659
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
1660
|
+
const existingTasks = await this.db.getAllAsync(
|
|
1661
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
1662
|
+
taskIdChunk
|
|
1663
|
+
);
|
|
1664
|
+
for (const existingTask of existingTasks) {
|
|
1665
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
for (const task of bundle.tasks) {
|
|
1669
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
1670
|
+
const existing = existingTasksById.get(task.id);
|
|
1671
|
+
if (existing) {
|
|
1672
|
+
if (existing.entity_id !== entityId) {
|
|
1673
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
1674
|
+
continue;
|
|
1675
|
+
}
|
|
1676
|
+
if (merge) {
|
|
1677
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1294
1678
|
}
|
|
1679
|
+
await this.db.runAsync(
|
|
1680
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
1681
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
1682
|
+
);
|
|
1683
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1684
|
+
} else {
|
|
1685
|
+
await this.db.runAsync(
|
|
1686
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1687
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
1688
|
+
);
|
|
1689
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1295
1690
|
}
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1691
|
+
}
|
|
1692
|
+
for (const event of bundle.events) {
|
|
1693
|
+
await this.db.runAsync(
|
|
1694
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
1695
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1696
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
1697
|
+
);
|
|
1698
|
+
}
|
|
1699
|
+
});
|
|
1700
|
+
this.vectorCache.delete(entityId);
|
|
1701
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1702
|
+
for (const fact of bundle.facts) {
|
|
1703
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
1704
|
+
await this.embedFact({
|
|
1705
|
+
id: fact.id,
|
|
1706
|
+
title: fact.title,
|
|
1707
|
+
body: fact.body,
|
|
1708
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
1709
|
+
});
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
try {
|
|
1713
|
+
const canonicalRow = await this.db.getFirstAsync(
|
|
1714
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
1715
|
+
);
|
|
1716
|
+
const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
|
|
1717
|
+
if (preservedBlobDims.size === 1) {
|
|
1718
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
1719
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
1720
|
+
await this.storeEmbeddingDimension(preservedDim);
|
|
1721
|
+
const staleMismatch = await this.db.getFirstAsync(
|
|
1722
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1723
|
+
);
|
|
1724
|
+
if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
|
|
1313
1725
|
await this.db.runAsync(
|
|
1314
|
-
`INSERT INTO ${this.prefix}
|
|
1315
|
-
[
|
|
1726
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1727
|
+
[String(preservedDim)]
|
|
1316
1728
|
);
|
|
1317
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1318
1729
|
}
|
|
1319
|
-
|
|
1320
|
-
|
|
1730
|
+
await this._reconcileEmbeddingDimension();
|
|
1731
|
+
} else {
|
|
1321
1732
|
await this.db.runAsync(
|
|
1322
|
-
`INSERT OR
|
|
1323
|
-
|
|
1324
|
-
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
1733
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1734
|
+
[String(canonicalDim)]
|
|
1325
1735
|
);
|
|
1326
1736
|
}
|
|
1327
|
-
})
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
await this.
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1737
|
+
} else if (preservedBlobDims.size > 1) {
|
|
1738
|
+
if (canonicalDim === null) {
|
|
1739
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
1740
|
+
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
1741
|
+
await this.db.runAsync(
|
|
1742
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1743
|
+
[String(sortedPreservedBlobDims[0])]
|
|
1744
|
+
);
|
|
1745
|
+
} else {
|
|
1746
|
+
await this.db.runAsync(
|
|
1747
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
1748
|
+
[String(canonicalDim)]
|
|
1749
|
+
);
|
|
1336
1750
|
}
|
|
1337
1751
|
}
|
|
1752
|
+
} finally {
|
|
1753
|
+
this.vectorCache.delete(entityId);
|
|
1338
1754
|
}
|
|
1339
|
-
await this.rebuildMiniSearchIndex();
|
|
1340
1755
|
}
|
|
1341
1756
|
async forget(entityId, params) {
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
if (
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
} else {
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
const
|
|
1365
|
-
let
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
const
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1757
|
+
let blockingOperation = null;
|
|
1758
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1759
|
+
blockingOperation = "librarian";
|
|
1760
|
+
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1761
|
+
blockingOperation = "heal";
|
|
1762
|
+
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1763
|
+
blockingOperation = "prune";
|
|
1764
|
+
} else if (this._isReembedActive(entityId)) {
|
|
1765
|
+
blockingOperation = "reembed";
|
|
1766
|
+
} else if (this._isIngestActiveFor(entityId)) {
|
|
1767
|
+
blockingOperation = "ingest";
|
|
1768
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
1769
|
+
blockingOperation = "import";
|
|
1770
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
1771
|
+
blockingOperation = "forget";
|
|
1772
|
+
}
|
|
1773
|
+
if (blockingOperation !== null) {
|
|
1774
|
+
throw new WikiBusyError(blockingOperation, entityId);
|
|
1775
|
+
}
|
|
1776
|
+
const forgetKey = this._forgetKey(entityId);
|
|
1777
|
+
this.activeMaintenanceJobs.add(forgetKey);
|
|
1778
|
+
try {
|
|
1779
|
+
const now = Date.now();
|
|
1780
|
+
let deletedEntries = 0;
|
|
1781
|
+
let deletedTasks = 0;
|
|
1782
|
+
if (params.clearAll) {
|
|
1783
|
+
const [entriesRes, tasksRes] = await Promise.all([
|
|
1784
|
+
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
1785
|
+
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
1786
|
+
]);
|
|
1787
|
+
await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
|
|
1788
|
+
deletedEntries = entriesRes.changes;
|
|
1789
|
+
deletedTasks = tasksRes.changes;
|
|
1790
|
+
} else {
|
|
1791
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
1792
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
1793
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
1794
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
1372
1795
|
}
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1796
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
1797
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
1798
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
1799
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
1800
|
+
const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
|
|
1801
|
+
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
1802
|
+
let refPromise = null;
|
|
1803
|
+
if (sourceRef || sourceHash) {
|
|
1804
|
+
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
1805
|
+
const args = [now, now, entityId];
|
|
1806
|
+
if (sourceRef) {
|
|
1807
|
+
q += ` AND source_ref = ?`;
|
|
1808
|
+
args.push(sourceRef);
|
|
1809
|
+
}
|
|
1810
|
+
if (sourceHash) {
|
|
1811
|
+
q += ` AND source_hash = ?`;
|
|
1812
|
+
args.push(sourceHash);
|
|
1813
|
+
}
|
|
1814
|
+
refPromise = this.db.runAsync(q, args);
|
|
1376
1815
|
}
|
|
1377
|
-
|
|
1816
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
1817
|
+
entryPromise ?? Promise.resolve(null),
|
|
1818
|
+
taskPromise ?? Promise.resolve(null),
|
|
1819
|
+
refPromise ?? Promise.resolve(null)
|
|
1820
|
+
]);
|
|
1821
|
+
if (entryResult) deletedEntries += entryResult.changes;
|
|
1822
|
+
if (taskResult) deletedTasks += taskResult.changes;
|
|
1823
|
+
if (refResult) deletedEntries += refResult.changes;
|
|
1378
1824
|
}
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
if (entryResult) deletedEntries += entryResult.changes;
|
|
1385
|
-
if (taskResult) deletedTasks += taskResult.changes;
|
|
1386
|
-
if (refResult) deletedEntries += refResult.changes;
|
|
1825
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1826
|
+
this.vectorCache.delete(entityId);
|
|
1827
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1828
|
+
} finally {
|
|
1829
|
+
this.activeMaintenanceJobs.delete(forgetKey);
|
|
1387
1830
|
}
|
|
1388
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1389
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1390
1831
|
}
|
|
1391
1832
|
async ingestDocument(entityId, params) {
|
|
1392
1833
|
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
@@ -1414,6 +1855,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1414
1855
|
if (this._isReembedActive(entityId)) {
|
|
1415
1856
|
throw new WikiBusyError("reembed", entityId);
|
|
1416
1857
|
}
|
|
1858
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1859
|
+
throw new WikiBusyError("import", entityId);
|
|
1860
|
+
}
|
|
1861
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1862
|
+
throw new WikiBusyError("forget", entityId);
|
|
1863
|
+
}
|
|
1417
1864
|
this.activeIngestJobs.add(jobKey);
|
|
1418
1865
|
try {
|
|
1419
1866
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1461,16 +1908,31 @@ ${chunk}`;
|
|
|
1461
1908
|
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1462
1909
|
}
|
|
1463
1910
|
});
|
|
1911
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1912
|
+
this.vectorCache.delete(entityId);
|
|
1464
1913
|
for (const fact of insertedFacts) {
|
|
1465
1914
|
await this.embedFact(fact);
|
|
1466
1915
|
}
|
|
1467
|
-
|
|
1916
|
+
this.vectorCache.delete(entityId);
|
|
1468
1917
|
return { truncated, chunks: chunks.length };
|
|
1469
1918
|
} finally {
|
|
1470
1919
|
this.activeIngestJobs.delete(jobKey);
|
|
1471
1920
|
}
|
|
1472
1921
|
}
|
|
1473
1922
|
};
|
|
1923
|
+
/**
|
|
1924
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
1925
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
1926
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
1927
|
+
*/
|
|
1928
|
+
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
1929
|
+
/**
|
|
1930
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
1931
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
1932
|
+
* entities while still maintaining a bounded memory footprint.
|
|
1933
|
+
*/
|
|
1934
|
+
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
1935
|
+
var WikiMemory = _WikiMemory;
|
|
1474
1936
|
|
|
1475
1937
|
// src/utils/formatContext.ts
|
|
1476
1938
|
function validateMaxOption(value, name) {
|
|
@@ -1666,8 +2128,23 @@ function formatMemoryDump(dump) {
|
|
|
1666
2128
|
name: formatEntityFileName(entityId),
|
|
1667
2129
|
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1668
2130
|
}));
|
|
2131
|
+
const manifestDump = {
|
|
2132
|
+
generatedAt: dump.generatedAt,
|
|
2133
|
+
entities: Object.fromEntries(
|
|
2134
|
+
Object.entries(dump.entities).map(([entityId, bundle]) => [
|
|
2135
|
+
entityId,
|
|
2136
|
+
{
|
|
2137
|
+
...bundle,
|
|
2138
|
+
facts: bundle.facts.map((f) => {
|
|
2139
|
+
const { embedding_blob: _blob, ...rest } = f;
|
|
2140
|
+
return rest;
|
|
2141
|
+
})
|
|
2142
|
+
}
|
|
2143
|
+
])
|
|
2144
|
+
)
|
|
2145
|
+
};
|
|
1669
2146
|
return {
|
|
1670
|
-
manifest: JSON.stringify(
|
|
2147
|
+
manifest: JSON.stringify(manifestDump, null, 2),
|
|
1671
2148
|
files
|
|
1672
2149
|
};
|
|
1673
2150
|
}
|