@plur-ai/core 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ // src/fts.ts
2
+ var STOP_WORDS = /* @__PURE__ */ new Set([
3
+ "the",
4
+ "and",
5
+ "for",
6
+ "that",
7
+ "this",
8
+ "with",
9
+ "from",
10
+ "are",
11
+ "was",
12
+ "were",
13
+ "been",
14
+ "have",
15
+ "has",
16
+ "not",
17
+ "but",
18
+ "its",
19
+ "you",
20
+ "your",
21
+ "can",
22
+ "will",
23
+ "should",
24
+ "would",
25
+ "could",
26
+ "may",
27
+ "might"
28
+ ]);
29
+ function ftsTokenize(text) {
30
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((w) => w.length > 2).filter((w) => !STOP_WORDS.has(w));
31
+ }
32
+ function engramSearchText(engram) {
33
+ const parts = [engram.statement];
34
+ if (engram.domain) parts.push(engram.domain.replace(/\./g, " "));
35
+ if (engram.tags.length > 0) parts.push(engram.tags.join(" "));
36
+ if (engram.entities) {
37
+ for (const e of engram.entities) {
38
+ parts.push(e.name);
39
+ if (e.type !== "other") parts.push(e.type);
40
+ }
41
+ }
42
+ if (engram.temporal) {
43
+ if (engram.temporal.valid_from) parts.push(engram.temporal.valid_from);
44
+ if (engram.temporal.valid_until) parts.push(engram.temporal.valid_until);
45
+ }
46
+ if (engram.rationale) parts.push(engram.rationale);
47
+ return parts.join(" ");
48
+ }
49
+ function computeIdf(engrams, queryTokens) {
50
+ const N = engrams.length;
51
+ if (N === 0) return /* @__PURE__ */ new Map();
52
+ const engramTermSets = engrams.map((e) => new Set(ftsTokenize(engramSearchText(e))));
53
+ const idf = /* @__PURE__ */ new Map();
54
+ for (const qt of queryTokens) {
55
+ let df = 0;
56
+ for (const termSet of engramTermSets) {
57
+ if (termSet.has(qt) || Array.from(termSet).some((t) => t.includes(qt) || qt.includes(t))) {
58
+ df++;
59
+ }
60
+ }
61
+ idf.set(qt, Math.max(0, Math.log(N / (1 + df))));
62
+ }
63
+ return idf;
64
+ }
65
+ function ftsScore(engram, queryTokens, idfWeights) {
66
+ const allTerms = ftsTokenize(engramSearchText(engram));
67
+ if (queryTokens.length === 0) return 0;
68
+ let weightedHits = 0;
69
+ let totalWeight = 0;
70
+ for (const qt of queryTokens) {
71
+ const weight = idfWeights?.get(qt) ?? 1;
72
+ totalWeight += weight;
73
+ if (allTerms.some((t) => t.includes(qt) || qt.includes(t))) {
74
+ weightedHits += weight;
75
+ }
76
+ }
77
+ if (totalWeight === 0) {
78
+ let matches = 0;
79
+ for (const qt of queryTokens) {
80
+ if (allTerms.some((t) => t.includes(qt) || qt.includes(t))) matches++;
81
+ }
82
+ return matches / queryTokens.length;
83
+ }
84
+ return weightedHits / totalWeight;
85
+ }
86
+ function searchEngrams(engrams, query, limit = 20) {
87
+ const queryTokens = ftsTokenize(query);
88
+ if (queryTokens.length === 0) return [];
89
+ const idfWeights = computeIdf(engrams, queryTokens);
90
+ return engrams.map((e) => ({ engram: e, score: ftsScore(e, queryTokens, idfWeights) })).filter((r) => r.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((r) => r.engram);
91
+ }
92
+
93
+ // src/embeddings.ts
94
+ import { existsSync as existsSync2, readFileSync, mkdirSync as mkdirSync2 } from "fs";
95
+ import { join as join2 } from "path";
96
+ import { createHash } from "crypto";
97
+
98
+ // src/sync.ts
99
+ import { execFileSync } from "child_process";
100
+ import { existsSync, writeFileSync, renameSync, mkdirSync } from "fs";
101
+ import { join, dirname } from "path";
102
+ var GITIGNORE = `# PLUR \u2014 derived/cache files (regenerated automatically)
103
+ embeddings/
104
+ .embeddings-cache.json
105
+ *.db
106
+ *.sqlite
107
+ exchange/
108
+ `;
109
+ function git(args, cwd) {
110
+ return execFileSync("git", args, { cwd, encoding: "utf8", timeout: 3e4 }).trim();
111
+ }
112
+ function gitSafe(args, cwd) {
113
+ try {
114
+ return git(args, cwd);
115
+ } catch {
116
+ return null;
117
+ }
118
+ }
119
+ function isGitRepo(root) {
120
+ return existsSync(join(root, ".git"));
121
+ }
122
+ function hasGitCli() {
123
+ try {
124
+ execFileSync("git", ["--version"], { encoding: "utf8", timeout: 5e3 });
125
+ return true;
126
+ } catch {
127
+ return false;
128
+ }
129
+ }
130
+ function getRemote(root) {
131
+ return gitSafe(["remote", "get-url", "origin"], root);
132
+ }
133
+ function isDirty(root) {
134
+ const status = gitSafe(["status", "--porcelain"], root);
135
+ return status !== null && status.length > 0;
136
+ }
137
+ function countDiff(root, direction) {
138
+ const tracking = gitSafe(["rev-parse", "--abbrev-ref", "@{u}"], root);
139
+ if (!tracking) return 0;
140
+ const flag = direction === "ahead" ? "--left-only" : "--right-only";
141
+ const count = gitSafe(["rev-list", flag, "--count", "HEAD...@{u}"], root);
142
+ return count ? parseInt(count, 10) : 0;
143
+ }
144
+ function getSyncStatus(root) {
145
+ if (!isGitRepo(root)) {
146
+ return { initialized: false, remote: null, dirty: false, branch: null, ahead: 0, behind: 0 };
147
+ }
148
+ const branch = gitSafe(["rev-parse", "--abbrev-ref", "HEAD"], root);
149
+ const remote = getRemote(root);
150
+ if (remote) gitSafe(["fetch", "origin", "--quiet"], root);
151
+ return {
152
+ initialized: true,
153
+ remote,
154
+ dirty: isDirty(root),
155
+ branch,
156
+ ahead: countDiff(root, "ahead"),
157
+ behind: countDiff(root, "behind")
158
+ };
159
+ }
160
+ function initRepo(root) {
161
+ git(["init"], root);
162
+ atomicWrite(join(root, ".gitignore"), GITIGNORE);
163
+ git(["add", "-A"], root);
164
+ git(["commit", "-m", "Initial PLUR engram store"], root);
165
+ }
166
+ function commitChanges(root) {
167
+ if (!isDirty(root)) return 0;
168
+ git(["add", "-A"], root);
169
+ const diff = gitSafe(["diff", "--cached", "--stat", "--shortstat"], root);
170
+ const now = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
171
+ git(["commit", "-m", `plur sync ${now}`], root);
172
+ const match = diff?.match(/(\d+) file/);
173
+ return match ? parseInt(match[1], 10) : 1;
174
+ }
175
+ function hasConflictMarkers(root) {
176
+ const result = gitSafe(["grep", "-l", "<<<<<<<"], root);
177
+ return result !== null && result.length > 0;
178
+ }
179
+ function pullRebase(root) {
180
+ const result = gitSafe(["pull", "--rebase", "origin", "main"], root);
181
+ if (result !== null) return true;
182
+ gitSafe(["rebase", "--abort"], root);
183
+ const mergeResult = gitSafe(["pull", "origin", "main", "--no-edit"], root);
184
+ if (mergeResult !== null) return true;
185
+ if (hasConflictMarkers(root)) {
186
+ gitSafe(["merge", "--abort"], root);
187
+ throw new Error("Sync conflict: YAML files have merge conflicts that require manual resolution. Your local changes are preserved.");
188
+ }
189
+ git(["add", "-A"], root);
190
+ gitSafe(["commit", "-m", "plur sync: merge conflict resolved (kept both)"], root);
191
+ return true;
192
+ }
193
+ function sync(root, remote) {
194
+ if (!hasGitCli()) {
195
+ throw new Error("git is not installed. Install git to enable sync.");
196
+ }
197
+ if (!isGitRepo(root)) {
198
+ initRepo(root);
199
+ if (remote) {
200
+ git(["remote", "add", "origin", remote], root);
201
+ const branch = git(["rev-parse", "--abbrev-ref", "HEAD"], root);
202
+ git(["push", "-u", "origin", branch], root);
203
+ return { action: "initialized", message: `Initialized and pushed to ${remote}`, remote, files_changed: 0 };
204
+ }
205
+ return {
206
+ action: "initialized",
207
+ message: "Initialized local git repo. Call plur.sync with remote to enable cross-device sync.",
208
+ remote: null,
209
+ files_changed: 0
210
+ };
211
+ }
212
+ const existingRemote = getRemote(root);
213
+ if (remote && !existingRemote) {
214
+ git(["remote", "add", "origin", remote], root);
215
+ const filesChanged2 = commitChanges(root);
216
+ const branch = git(["rev-parse", "--abbrev-ref", "HEAD"], root);
217
+ git(["push", "-u", "origin", branch], root);
218
+ return { action: "synced", message: `Remote added and pushed to ${remote}`, remote, files_changed: filesChanged2 };
219
+ }
220
+ if (!existingRemote) {
221
+ const filesChanged2 = commitChanges(root);
222
+ if (filesChanged2 === 0) {
223
+ return { action: "up-to-date", message: 'No changes to commit. Add a remote with plur.sync({ remote: "..." }) to enable cross-device sync.', remote: null, files_changed: 0 };
224
+ }
225
+ return { action: "committed", message: `Committed ${filesChanged2} file(s) locally.`, remote: null, files_changed: filesChanged2 };
226
+ }
227
+ const filesChanged = commitChanges(root);
228
+ gitSafe(["fetch", "origin", "--quiet"], root);
229
+ const behind = countDiff(root, "behind");
230
+ const aheadBefore = countDiff(root, "ahead");
231
+ if (behind > 0) {
232
+ pullRebase(root);
233
+ }
234
+ const aheadAfter = countDiff(root, "ahead");
235
+ if (aheadAfter > 0) {
236
+ gitSafe(["push", "origin"], root);
237
+ }
238
+ if (filesChanged === 0 && behind === 0 && aheadBefore === 0) {
239
+ return { action: "up-to-date", message: "Already in sync.", remote: existingRemote, files_changed: 0 };
240
+ }
241
+ const parts = [];
242
+ if (filesChanged > 0) parts.push(`${filesChanged} file(s) committed`);
243
+ if (behind > 0) parts.push(`pulled ${behind} remote commit(s)`);
244
+ if (aheadAfter === 0 && aheadBefore > 0) parts.push("pushed");
245
+ return {
246
+ action: "synced",
247
+ message: `Synced. ${parts.join(", ")}.`,
248
+ remote: existingRemote,
249
+ files_changed: filesChanged
250
+ };
251
+ }
252
+ function atomicWrite(filePath, content) {
253
+ const dir = dirname(filePath);
254
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
255
+ const tmp = filePath + ".tmp";
256
+ writeFileSync(tmp, content);
257
+ renameSync(tmp, filePath);
258
+ }
259
+
260
+ // src/embeddings.ts
261
+ var embedPipeline = null;
262
+ var transformersUnavailable = false;
263
+ async function getEmbedder() {
264
+ if (transformersUnavailable) return null;
265
+ if (!embedPipeline) {
266
+ try {
267
+ const { pipeline } = await import("./transformers.node-PH5YK5EA.js");
268
+ embedPipeline = await pipeline("feature-extraction", "Xenova/bge-small-en-v1.5", {
269
+ dtype: "fp32"
270
+ });
271
+ } catch {
272
+ transformersUnavailable = true;
273
+ return null;
274
+ }
275
+ }
276
+ return embedPipeline;
277
+ }
278
+ async function embed(text) {
279
+ const embedder = await getEmbedder();
280
+ if (!embedder) return null;
281
+ const result = await embedder(text, { pooling: "cls", normalize: true });
282
+ return new Float32Array(result.data);
283
+ }
284
+ function cosineSimilarity(a, b) {
285
+ let dot = 0;
286
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
287
+ return dot;
288
+ }
289
+ function loadCache(cachePath) {
290
+ if (!existsSync2(cachePath)) return {};
291
+ try {
292
+ return JSON.parse(readFileSync(cachePath, "utf8"));
293
+ } catch {
294
+ return {};
295
+ }
296
+ }
297
+ function saveCache(cachePath, cache) {
298
+ const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
299
+ if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
300
+ atomicWrite(cachePath, JSON.stringify(cache));
301
+ }
302
+ function hashStatement(statement) {
303
+ return createHash("sha256").update(statement).digest("hex").slice(0, 16);
304
+ }
305
+ async function embeddingSearch(engrams, query, limit, storagePath) {
306
+ if (engrams.length === 0) return [];
307
+ const cachePath = storagePath ? join2(storagePath, ".embeddings-cache.json") : ".embeddings-cache.json";
308
+ const cache = loadCache(cachePath);
309
+ const queryEmbedding = await embed(query);
310
+ if (!queryEmbedding) {
311
+ return [];
312
+ }
313
+ const similarities = [];
314
+ for (const engram of engrams) {
315
+ const searchText = engramSearchText(engram);
316
+ const hash = hashStatement(searchText);
317
+ let engramEmbedding;
318
+ if (cache[engram.id]?.hash === hash) {
319
+ engramEmbedding = new Float32Array(cache[engram.id].embedding);
320
+ } else {
321
+ const emb = await embed(searchText);
322
+ if (!emb) return [];
323
+ engramEmbedding = emb;
324
+ cache[engram.id] = {
325
+ hash,
326
+ embedding: Array.from(engramEmbedding)
327
+ };
328
+ }
329
+ const score = cosineSimilarity(queryEmbedding, engramEmbedding);
330
+ similarities.push({ engram, score });
331
+ }
332
+ saveCache(cachePath, cache);
333
+ similarities.sort((a, b) => b.score - a.score);
334
+ return similarities.slice(0, limit).map((s) => s.engram);
335
+ }
336
+
337
+ export {
338
+ getSyncStatus,
339
+ sync,
340
+ atomicWrite,
341
+ ftsTokenize,
342
+ engramSearchText,
343
+ ftsScore,
344
+ searchEngrams,
345
+ embed,
346
+ cosineSimilarity,
347
+ embeddingSearch
348
+ };
@@ -0,0 +1,11 @@
1
+ import {
2
+ cosineSimilarity,
3
+ embed,
4
+ embeddingSearch
5
+ } from "./chunk-WPD4MPTT.js";
6
+ import "./chunk-2ZDO52B4.js";
7
+ export {
8
+ cosineSimilarity,
9
+ embed,
10
+ embeddingSearch
11
+ };