@plur-ai/core 0.3.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-KMVQYBNP.js +397 -0
- package/dist/embeddings-2IODIQAF.js +11 -0
- package/dist/index.d.ts +524 -2
- package/dist/index.js +2189 -529
- package/package.json +4 -2
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
// src/fts.ts
|
|
2
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
3
|
+
"the",
|
|
4
|
+
"and",
|
|
5
|
+
"for",
|
|
6
|
+
"that",
|
|
7
|
+
"this",
|
|
8
|
+
"with",
|
|
9
|
+
"from",
|
|
10
|
+
"are",
|
|
11
|
+
"was",
|
|
12
|
+
"were",
|
|
13
|
+
"been",
|
|
14
|
+
"have",
|
|
15
|
+
"has",
|
|
16
|
+
"not",
|
|
17
|
+
"but",
|
|
18
|
+
"its",
|
|
19
|
+
"you",
|
|
20
|
+
"your",
|
|
21
|
+
"can",
|
|
22
|
+
"will",
|
|
23
|
+
"should",
|
|
24
|
+
"would",
|
|
25
|
+
"could",
|
|
26
|
+
"may",
|
|
27
|
+
"might"
|
|
28
|
+
]);
|
|
29
|
+
function ftsTokenize(text) {
|
|
30
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((w) => w.length > 2).filter((w) => !STOP_WORDS.has(w));
|
|
31
|
+
}
|
|
32
|
+
function engramSearchText(engram) {
|
|
33
|
+
const parts = [engram.statement];
|
|
34
|
+
if (engram.domain) parts.push(engram.domain.replace(/\./g, " "));
|
|
35
|
+
if (engram.tags.length > 0) parts.push(engram.tags.join(" "));
|
|
36
|
+
if (engram.entities) {
|
|
37
|
+
for (const e of engram.entities) {
|
|
38
|
+
parts.push(e.name);
|
|
39
|
+
if (e.type !== "other") parts.push(e.type);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (engram.temporal) {
|
|
43
|
+
if (engram.temporal.valid_from) parts.push(engram.temporal.valid_from);
|
|
44
|
+
if (engram.temporal.valid_until) parts.push(engram.temporal.valid_until);
|
|
45
|
+
}
|
|
46
|
+
if (engram.rationale) parts.push(engram.rationale);
|
|
47
|
+
return parts.join(" ");
|
|
48
|
+
}
|
|
49
|
+
function computeIdf(engrams, queryTokens) {
|
|
50
|
+
const N = engrams.length;
|
|
51
|
+
if (N === 0) return /* @__PURE__ */ new Map();
|
|
52
|
+
const engramTermSets = engrams.map((e) => new Set(ftsTokenize(engramSearchText(e))));
|
|
53
|
+
const idf = /* @__PURE__ */ new Map();
|
|
54
|
+
for (const qt of queryTokens) {
|
|
55
|
+
let df = 0;
|
|
56
|
+
for (const termSet of engramTermSets) {
|
|
57
|
+
if (termSet.has(qt) || Array.from(termSet).some((t) => t.includes(qt) || qt.includes(t))) {
|
|
58
|
+
df++;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
idf.set(qt, Math.max(0, Math.log(N / (1 + df))));
|
|
62
|
+
}
|
|
63
|
+
return idf;
|
|
64
|
+
}
|
|
65
|
+
var BM25_K1 = 1.2;
|
|
66
|
+
var BM25_B = 0.75;
|
|
67
|
+
function ftsScore(engram, queryTokens, idfWeights, avgDocLength) {
|
|
68
|
+
const allTerms = ftsTokenize(engramSearchText(engram));
|
|
69
|
+
if (queryTokens.length === 0) return 0;
|
|
70
|
+
const docLen = allTerms.length;
|
|
71
|
+
const avgdl = avgDocLength && avgDocLength > 0 ? avgDocLength : docLen;
|
|
72
|
+
const hasNonZeroIdf = idfWeights && Array.from(idfWeights.values()).some((v) => v > 0);
|
|
73
|
+
let score = 0;
|
|
74
|
+
for (const qt of queryTokens) {
|
|
75
|
+
let effectiveIdf;
|
|
76
|
+
if (!idfWeights) {
|
|
77
|
+
effectiveIdf = 1;
|
|
78
|
+
} else if (hasNonZeroIdf) {
|
|
79
|
+
effectiveIdf = idfWeights.get(qt) ?? 0;
|
|
80
|
+
if (effectiveIdf === 0) continue;
|
|
81
|
+
} else {
|
|
82
|
+
effectiveIdf = 1;
|
|
83
|
+
}
|
|
84
|
+
let tf = 0;
|
|
85
|
+
for (const t of allTerms) {
|
|
86
|
+
if (t.includes(qt) || qt.includes(t)) tf++;
|
|
87
|
+
}
|
|
88
|
+
if (tf === 0) continue;
|
|
89
|
+
const numerator = tf * (BM25_K1 + 1);
|
|
90
|
+
const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * docLen / avgdl);
|
|
91
|
+
score += effectiveIdf * (numerator / denominator);
|
|
92
|
+
}
|
|
93
|
+
return score;
|
|
94
|
+
}
|
|
95
|
+
function searchEngrams(engrams, query, limit = 20) {
|
|
96
|
+
const queryTokens = ftsTokenize(query);
|
|
97
|
+
if (queryTokens.length === 0) return [];
|
|
98
|
+
const idfWeights = computeIdf(engrams, queryTokens);
|
|
99
|
+
const avgDocLength = engrams.length > 0 ? engrams.reduce((sum, e) => sum + ftsTokenize(engramSearchText(e)).length, 0) / engrams.length : 0;
|
|
100
|
+
return engrams.map((e) => ({ engram: e, score: ftsScore(e, queryTokens, idfWeights, avgDocLength) })).filter((r) => r.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((r) => r.engram);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// src/embeddings.ts
|
|
104
|
+
import { existsSync as existsSync2, readFileSync, mkdirSync as mkdirSync2 } from "fs";
|
|
105
|
+
import { join as join2 } from "path";
|
|
106
|
+
import { createHash } from "crypto";
|
|
107
|
+
|
|
108
|
+
// src/sync.ts
|
|
109
|
+
import { execFileSync } from "child_process";
|
|
110
|
+
import { existsSync, writeFileSync, renameSync, mkdirSync, unlinkSync, statSync } from "fs";
|
|
111
|
+
import { join, dirname } from "path";
|
|
112
|
+
var GITIGNORE = `# PLUR \u2014 derived/cache files (regenerated automatically)
|
|
113
|
+
embeddings/
|
|
114
|
+
.embeddings-cache.json
|
|
115
|
+
*.db
|
|
116
|
+
*.sqlite
|
|
117
|
+
exchange/
|
|
118
|
+
`;
|
|
119
|
+
function git(args, cwd) {
|
|
120
|
+
return execFileSync("git", args, { cwd, encoding: "utf8", timeout: 3e4 }).trim();
|
|
121
|
+
}
|
|
122
|
+
function gitSafe(args, cwd) {
|
|
123
|
+
try {
|
|
124
|
+
return git(args, cwd);
|
|
125
|
+
} catch {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
function isGitRepo(root) {
|
|
130
|
+
return existsSync(join(root, ".git"));
|
|
131
|
+
}
|
|
132
|
+
function hasGitCli() {
|
|
133
|
+
try {
|
|
134
|
+
execFileSync("git", ["--version"], { encoding: "utf8", timeout: 5e3 });
|
|
135
|
+
return true;
|
|
136
|
+
} catch {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
function getRemote(root) {
|
|
141
|
+
return gitSafe(["remote", "get-url", "origin"], root);
|
|
142
|
+
}
|
|
143
|
+
function isDirty(root) {
|
|
144
|
+
const status = gitSafe(["status", "--porcelain"], root);
|
|
145
|
+
return status !== null && status.length > 0;
|
|
146
|
+
}
|
|
147
|
+
function countDiff(root, direction) {
|
|
148
|
+
const tracking = gitSafe(["rev-parse", "--abbrev-ref", "@{u}"], root);
|
|
149
|
+
if (!tracking) return 0;
|
|
150
|
+
const flag = direction === "ahead" ? "--left-only" : "--right-only";
|
|
151
|
+
const count = gitSafe(["rev-list", flag, "--count", "HEAD...@{u}"], root);
|
|
152
|
+
return count ? parseInt(count, 10) : 0;
|
|
153
|
+
}
|
|
154
|
+
function getSyncStatus(root) {
|
|
155
|
+
if (!isGitRepo(root)) {
|
|
156
|
+
return { initialized: false, remote: null, dirty: false, branch: null, ahead: 0, behind: 0 };
|
|
157
|
+
}
|
|
158
|
+
const branch = gitSafe(["rev-parse", "--abbrev-ref", "HEAD"], root);
|
|
159
|
+
const remote = getRemote(root);
|
|
160
|
+
if (remote) gitSafe(["fetch", "origin", "--quiet"], root);
|
|
161
|
+
return {
|
|
162
|
+
initialized: true,
|
|
163
|
+
remote,
|
|
164
|
+
dirty: isDirty(root),
|
|
165
|
+
branch,
|
|
166
|
+
ahead: countDiff(root, "ahead"),
|
|
167
|
+
behind: countDiff(root, "behind")
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
function initRepo(root) {
|
|
171
|
+
git(["init"], root);
|
|
172
|
+
atomicWrite(join(root, ".gitignore"), GITIGNORE);
|
|
173
|
+
git(["add", "-A"], root);
|
|
174
|
+
git(["commit", "-m", "Initial PLUR engram store"], root);
|
|
175
|
+
}
|
|
176
|
+
function commitChanges(root) {
|
|
177
|
+
if (!isDirty(root)) return 0;
|
|
178
|
+
git(["add", "-A"], root);
|
|
179
|
+
const diff = gitSafe(["diff", "--cached", "--stat", "--shortstat"], root);
|
|
180
|
+
const now = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
|
|
181
|
+
git(["commit", "-m", `plur sync ${now}`], root);
|
|
182
|
+
const match = diff?.match(/(\d+) file/);
|
|
183
|
+
return match ? parseInt(match[1], 10) : 1;
|
|
184
|
+
}
|
|
185
|
+
function hasConflictMarkers(root) {
|
|
186
|
+
const result = gitSafe(["grep", "-l", "<<<<<<<"], root);
|
|
187
|
+
return result !== null && result.length > 0;
|
|
188
|
+
}
|
|
189
|
+
function pullRebase(root) {
|
|
190
|
+
const result = gitSafe(["pull", "--rebase", "origin", "main"], root);
|
|
191
|
+
if (result !== null) return true;
|
|
192
|
+
gitSafe(["rebase", "--abort"], root);
|
|
193
|
+
const mergeResult = gitSafe(["pull", "origin", "main", "--no-edit"], root);
|
|
194
|
+
if (mergeResult !== null) return true;
|
|
195
|
+
if (hasConflictMarkers(root)) {
|
|
196
|
+
gitSafe(["merge", "--abort"], root);
|
|
197
|
+
throw new Error("Sync conflict: YAML files have merge conflicts that require manual resolution. Your local changes are preserved.");
|
|
198
|
+
}
|
|
199
|
+
git(["add", "-A"], root);
|
|
200
|
+
gitSafe(["commit", "-m", "plur sync: merge conflict resolved (kept both)"], root);
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
function sync(root, remote) {
|
|
204
|
+
if (!hasGitCli()) {
|
|
205
|
+
throw new Error("git is not installed. Install git to enable sync.");
|
|
206
|
+
}
|
|
207
|
+
if (!isGitRepo(root)) {
|
|
208
|
+
initRepo(root);
|
|
209
|
+
if (remote) {
|
|
210
|
+
git(["remote", "add", "origin", remote], root);
|
|
211
|
+
const branch = git(["rev-parse", "--abbrev-ref", "HEAD"], root);
|
|
212
|
+
git(["push", "-u", "origin", branch], root);
|
|
213
|
+
return { action: "initialized", message: `Initialized and pushed to ${remote}`, remote, files_changed: 0 };
|
|
214
|
+
}
|
|
215
|
+
return {
|
|
216
|
+
action: "initialized",
|
|
217
|
+
message: "Initialized local git repo. Call plur.sync with remote to enable cross-device sync.",
|
|
218
|
+
remote: null,
|
|
219
|
+
files_changed: 0
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
const existingRemote = getRemote(root);
|
|
223
|
+
if (remote && !existingRemote) {
|
|
224
|
+
git(["remote", "add", "origin", remote], root);
|
|
225
|
+
const filesChanged2 = commitChanges(root);
|
|
226
|
+
const branch = git(["rev-parse", "--abbrev-ref", "HEAD"], root);
|
|
227
|
+
git(["push", "-u", "origin", branch], root);
|
|
228
|
+
return { action: "synced", message: `Remote added and pushed to ${remote}`, remote, files_changed: filesChanged2 };
|
|
229
|
+
}
|
|
230
|
+
if (!existingRemote) {
|
|
231
|
+
const filesChanged2 = commitChanges(root);
|
|
232
|
+
if (filesChanged2 === 0) {
|
|
233
|
+
return { action: "up-to-date", message: 'No changes to commit. Add a remote with plur.sync({ remote: "..." }) to enable cross-device sync.', remote: null, files_changed: 0 };
|
|
234
|
+
}
|
|
235
|
+
return { action: "committed", message: `Committed ${filesChanged2} file(s) locally.`, remote: null, files_changed: filesChanged2 };
|
|
236
|
+
}
|
|
237
|
+
const filesChanged = commitChanges(root);
|
|
238
|
+
gitSafe(["fetch", "origin", "--quiet"], root);
|
|
239
|
+
const behind = countDiff(root, "behind");
|
|
240
|
+
const aheadBefore = countDiff(root, "ahead");
|
|
241
|
+
if (behind > 0) {
|
|
242
|
+
pullRebase(root);
|
|
243
|
+
}
|
|
244
|
+
const aheadAfter = countDiff(root, "ahead");
|
|
245
|
+
if (aheadAfter > 0) {
|
|
246
|
+
gitSafe(["push", "origin"], root);
|
|
247
|
+
}
|
|
248
|
+
if (filesChanged === 0 && behind === 0 && aheadBefore === 0) {
|
|
249
|
+
return { action: "up-to-date", message: "Already in sync.", remote: existingRemote, files_changed: 0 };
|
|
250
|
+
}
|
|
251
|
+
const parts = [];
|
|
252
|
+
if (filesChanged > 0) parts.push(`${filesChanged} file(s) committed`);
|
|
253
|
+
if (behind > 0) parts.push(`pulled ${behind} remote commit(s)`);
|
|
254
|
+
if (aheadAfter === 0 && aheadBefore > 0) parts.push("pushed");
|
|
255
|
+
return {
|
|
256
|
+
action: "synced",
|
|
257
|
+
message: `Synced. ${parts.join(", ")}.`,
|
|
258
|
+
remote: existingRemote,
|
|
259
|
+
files_changed: filesChanged
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
function withLock(filePath, fn, options) {
|
|
263
|
+
const lockPath = filePath + ".lock";
|
|
264
|
+
const maxRetries = options?.maxRetries ?? 5;
|
|
265
|
+
const baseDelay = options?.baseDelay ?? 100;
|
|
266
|
+
const staleThreshold = options?.staleThreshold ?? 1e4;
|
|
267
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
268
|
+
try {
|
|
269
|
+
writeFileSync(lockPath, `${process.pid}`, { flag: "wx" });
|
|
270
|
+
break;
|
|
271
|
+
} catch (err) {
|
|
272
|
+
if (err.code !== "EEXIST") throw err;
|
|
273
|
+
try {
|
|
274
|
+
const stat = statSync(lockPath);
|
|
275
|
+
if (Date.now() - stat.mtimeMs > staleThreshold) {
|
|
276
|
+
unlinkSync(lockPath);
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
279
|
+
} catch {
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
if (attempt === maxRetries) {
|
|
283
|
+
throw new Error(`Failed to acquire lock on ${filePath} after ${maxRetries} retries`);
|
|
284
|
+
}
|
|
285
|
+
const delay = baseDelay * Math.pow(2, attempt);
|
|
286
|
+
const end = Date.now() + delay;
|
|
287
|
+
while (Date.now() < end) {
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
try {
|
|
292
|
+
return fn();
|
|
293
|
+
} finally {
|
|
294
|
+
try {
|
|
295
|
+
unlinkSync(lockPath);
|
|
296
|
+
} catch {
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
function atomicWrite(filePath, content) {
|
|
301
|
+
const dir = dirname(filePath);
|
|
302
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
303
|
+
const tmp = filePath + ".tmp";
|
|
304
|
+
writeFileSync(tmp, content);
|
|
305
|
+
renameSync(tmp, filePath);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// src/embeddings.ts
|
|
309
|
+
var embedPipeline = null;
|
|
310
|
+
var transformersUnavailable = false;
|
|
311
|
+
async function getEmbedder() {
|
|
312
|
+
if (transformersUnavailable) return null;
|
|
313
|
+
if (!embedPipeline) {
|
|
314
|
+
try {
|
|
315
|
+
const { pipeline } = await import("./transformers.node-PH5YK5EA.js");
|
|
316
|
+
embedPipeline = await pipeline("feature-extraction", "Xenova/bge-small-en-v1.5", {
|
|
317
|
+
dtype: "fp32"
|
|
318
|
+
});
|
|
319
|
+
} catch {
|
|
320
|
+
transformersUnavailable = true;
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return embedPipeline;
|
|
325
|
+
}
|
|
326
|
+
async function embed(text) {
|
|
327
|
+
const embedder = await getEmbedder();
|
|
328
|
+
if (!embedder) return null;
|
|
329
|
+
const result = await embedder(text, { pooling: "cls", normalize: true });
|
|
330
|
+
return new Float32Array(result.data);
|
|
331
|
+
}
|
|
332
|
+
function cosineSimilarity(a, b) {
|
|
333
|
+
let dot = 0;
|
|
334
|
+
for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
|
|
335
|
+
return dot;
|
|
336
|
+
}
|
|
337
|
+
function loadCache(cachePath) {
|
|
338
|
+
if (!existsSync2(cachePath)) return {};
|
|
339
|
+
try {
|
|
340
|
+
return JSON.parse(readFileSync(cachePath, "utf8"));
|
|
341
|
+
} catch {
|
|
342
|
+
return {};
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
function saveCache(cachePath, cache) {
|
|
346
|
+
const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
|
|
347
|
+
if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
|
|
348
|
+
atomicWrite(cachePath, JSON.stringify(cache));
|
|
349
|
+
}
|
|
350
|
+
function hashStatement(statement) {
|
|
351
|
+
return createHash("sha256").update(statement).digest("hex").slice(0, 16);
|
|
352
|
+
}
|
|
353
|
+
async function embeddingSearch(engrams, query, limit, storagePath) {
|
|
354
|
+
if (engrams.length === 0) return [];
|
|
355
|
+
const cachePath = storagePath ? join2(storagePath, ".embeddings-cache.json") : ".embeddings-cache.json";
|
|
356
|
+
const cache = loadCache(cachePath);
|
|
357
|
+
const queryEmbedding = await embed(query);
|
|
358
|
+
if (!queryEmbedding) {
|
|
359
|
+
return [];
|
|
360
|
+
}
|
|
361
|
+
const similarities = [];
|
|
362
|
+
for (const engram of engrams) {
|
|
363
|
+
const searchText = engramSearchText(engram);
|
|
364
|
+
const hash = hashStatement(searchText);
|
|
365
|
+
let engramEmbedding;
|
|
366
|
+
if (cache[engram.id]?.hash === hash) {
|
|
367
|
+
engramEmbedding = new Float32Array(cache[engram.id].embedding);
|
|
368
|
+
} else {
|
|
369
|
+
const emb = await embed(searchText);
|
|
370
|
+
if (!emb) return [];
|
|
371
|
+
engramEmbedding = emb;
|
|
372
|
+
cache[engram.id] = {
|
|
373
|
+
hash,
|
|
374
|
+
embedding: Array.from(engramEmbedding)
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
const score = cosineSimilarity(queryEmbedding, engramEmbedding);
|
|
378
|
+
similarities.push({ engram, score });
|
|
379
|
+
}
|
|
380
|
+
saveCache(cachePath, cache);
|
|
381
|
+
similarities.sort((a, b) => b.score - a.score);
|
|
382
|
+
return similarities.slice(0, limit).map((s) => s.engram);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
export {
|
|
386
|
+
getSyncStatus,
|
|
387
|
+
sync,
|
|
388
|
+
withLock,
|
|
389
|
+
atomicWrite,
|
|
390
|
+
ftsTokenize,
|
|
391
|
+
engramSearchText,
|
|
392
|
+
ftsScore,
|
|
393
|
+
searchEngrams,
|
|
394
|
+
embed,
|
|
395
|
+
cosineSimilarity,
|
|
396
|
+
embeddingSearch
|
|
397
|
+
};
|