@ouro.bot/cli 0.1.0-alpha.54 → 0.1.0-alpha.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +8 -0
- package/dist/mind/memory.js +17 -23
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.55",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Memory fact dedup now catches paraphrased duplicates via cosine similarity on existing embeddings, so semantically equivalent facts no longer slip past the word-overlap check.",
|
|
8
|
+
"Semantic dedup gracefully handles corrupt JSONL entries with missing or undefined embeddings instead of crashing on bad data.",
|
|
9
|
+
"Cosine similarity is now imported from associative-recall instead of duplicated in the memory module."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
4
12
|
{
|
|
5
13
|
"version": "0.1.0-alpha.54",
|
|
6
14
|
"changes": [
|
package/dist/mind/memory.js
CHANGED
|
@@ -33,7 +33,6 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.__memoryTestUtils = void 0;
|
|
37
36
|
exports.ensureMemoryStorePaths = ensureMemoryStorePaths;
|
|
38
37
|
exports.appendFactsWithDedup = appendFactsWithDedup;
|
|
39
38
|
exports.readMemoryFacts = readMemoryFacts;
|
|
@@ -46,7 +45,9 @@ const crypto_1 = require("crypto");
|
|
|
46
45
|
const config_1 = require("../heart/config");
|
|
47
46
|
const identity_1 = require("../heart/identity");
|
|
48
47
|
const runtime_1 = require("../nerves/runtime");
|
|
48
|
+
const associative_recall_1 = require("./associative-recall");
|
|
49
49
|
const DEDUP_THRESHOLD = 0.6;
|
|
50
|
+
const SEMANTIC_DEDUP_THRESHOLD = 0.95;
|
|
50
51
|
const ENTITY_TOKEN = /[a-z0-9]+/g;
|
|
51
52
|
const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small";
|
|
52
53
|
class OpenAIEmbeddingProvider {
|
|
@@ -177,13 +178,24 @@ function appendDailyFact(dailyDir, fact) {
|
|
|
177
178
|
const dayPath = path.join(dailyDir, `${day}.jsonl`);
|
|
178
179
|
fs.appendFileSync(dayPath, `${JSON.stringify(fact)}\n`, "utf8");
|
|
179
180
|
}
|
|
180
|
-
function appendFactsWithDedup(stores, incoming) {
|
|
181
|
+
function appendFactsWithDedup(stores, incoming, options) {
|
|
181
182
|
const existing = readExistingFacts(stores.factsPath);
|
|
182
183
|
const all = [...existing];
|
|
183
184
|
let added = 0;
|
|
184
185
|
let skipped = 0;
|
|
186
|
+
const semanticThreshold = options?.semanticThreshold;
|
|
185
187
|
for (const fact of incoming) {
|
|
186
|
-
const duplicate = all.some((prior) =>
|
|
188
|
+
const duplicate = all.some((prior) => {
|
|
189
|
+
if (overlapScore(prior.text, fact.text) > DEDUP_THRESHOLD)
|
|
190
|
+
return true;
|
|
191
|
+
if (semanticThreshold !== undefined &&
|
|
192
|
+
Array.isArray(fact.embedding) && fact.embedding.length > 0 &&
|
|
193
|
+
Array.isArray(prior.embedding) && prior.embedding.length > 0 &&
|
|
194
|
+
fact.embedding.length === prior.embedding.length) {
|
|
195
|
+
return (0, associative_recall_1.cosineSimilarity)(fact.embedding, prior.embedding) > semanticThreshold;
|
|
196
|
+
}
|
|
197
|
+
return false;
|
|
198
|
+
});
|
|
187
199
|
if (duplicate) {
|
|
188
200
|
skipped++;
|
|
189
201
|
continue;
|
|
@@ -202,24 +214,6 @@ function appendFactsWithDedup(stores, incoming) {
|
|
|
202
214
|
});
|
|
203
215
|
return { added, skipped };
|
|
204
216
|
}
|
|
205
|
-
function cosineSimilarity(left, right) {
|
|
206
|
-
if (left.length === 0 || right.length === 0 || left.length !== right.length)
|
|
207
|
-
return 0;
|
|
208
|
-
let dot = 0;
|
|
209
|
-
let leftNorm = 0;
|
|
210
|
-
let rightNorm = 0;
|
|
211
|
-
for (let i = 0; i < left.length; i += 1) {
|
|
212
|
-
dot += left[i] * right[i];
|
|
213
|
-
leftNorm += left[i] * left[i];
|
|
214
|
-
rightNorm += right[i] * right[i];
|
|
215
|
-
}
|
|
216
|
-
if (leftNorm === 0 || rightNorm === 0)
|
|
217
|
-
return 0;
|
|
218
|
-
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
219
|
-
}
|
|
220
|
-
exports.__memoryTestUtils = {
|
|
221
|
-
cosineSimilarity,
|
|
222
|
-
};
|
|
223
217
|
function createDefaultEmbeddingProvider() {
|
|
224
218
|
const apiKey = (0, config_1.getOpenAIEmbeddingsApiKey)().trim();
|
|
225
219
|
if (!apiKey)
|
|
@@ -271,7 +265,7 @@ async function saveMemoryFact(options) {
|
|
|
271
265
|
createdAt: (options.now ?? (() => new Date()))().toISOString(),
|
|
272
266
|
embedding,
|
|
273
267
|
};
|
|
274
|
-
return appendFactsWithDedup(stores, [fact]);
|
|
268
|
+
return appendFactsWithDedup(stores, [fact], { semanticThreshold: SEMANTIC_DEDUP_THRESHOLD });
|
|
275
269
|
}
|
|
276
270
|
async function backfillEmbeddings(options) {
|
|
277
271
|
const memoryRoot = options?.memoryRoot ?? path.join((0, identity_1.getAgentRoot)(), "psyche", "memory");
|
|
@@ -372,7 +366,7 @@ async function searchMemoryFacts(query, facts, embeddingProvider) {
|
|
|
372
366
|
.filter((fact) => fact.embedding.length === queryEmbedding.length)
|
|
373
367
|
.map((fact) => ({
|
|
374
368
|
fact,
|
|
375
|
-
score: cosineSimilarity(queryEmbedding, fact.embedding),
|
|
369
|
+
score: (0, associative_recall_1.cosineSimilarity)(queryEmbedding, fact.embedding),
|
|
376
370
|
}))
|
|
377
371
|
.filter((entry) => entry.score > 0)
|
|
378
372
|
.sort((left, right) => right.score - left.score)
|