@plur-ai/core 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,452 @@
1
+ import {
2
+ atomicWrite
3
+ } from "./chunk-MY4XVDCE.js";
4
+
5
+ // src/logger.ts
6
+ var level = process.env.PLUR_LOG_LEVEL || "warning";
7
+ var levels = { debug: 0, info: 1, warning: 2, error: 3 };
8
+ var threshold = levels[level] ?? 2;
9
+ var logger = {
10
+ debug: (...args) => {
11
+ if (threshold <= 0) console.error("[plur:debug]", ...args);
12
+ },
13
+ info: (...args) => {
14
+ if (threshold <= 1) console.error("[plur:info]", ...args);
15
+ },
16
+ warning: (...args) => {
17
+ if (threshold <= 2) console.error("[plur:warning]", ...args);
18
+ },
19
+ error: (...args) => {
20
+ if (threshold <= 3) console.error("[plur:error]", ...args);
21
+ }
22
+ };
23
+
24
+ // src/engrams.ts
25
+ import * as fs from "fs";
26
+ import * as yaml from "js-yaml";
27
+
28
+ // src/schemas/engram.ts
29
+ import { z } from "zod";
30
+ var ActivationSchema = z.object({
31
+ retrieval_strength: z.number().min(0).max(1),
32
+ storage_strength: z.number().min(0).max(1),
33
+ frequency: z.number().int().min(0),
34
+ last_accessed: z.string()
35
+ });
36
+ var KnowledgeTypeSchema = z.object({
37
+ memory_class: z.enum(["semantic", "episodic", "procedural", "metacognitive"]),
38
+ cognitive_level: z.enum(["remember", "understand", "apply", "analyze", "evaluate", "create"])
39
+ });
40
+ var KnowledgeAnchorSchema = z.object({
41
+ path: z.string(),
42
+ relevance: z.enum(["primary", "supporting", "example"]).default("supporting"),
43
+ snippet: z.string().max(200).optional(),
44
+ snippet_extracted_at: z.string().optional()
45
+ });
46
+ var AssociationSchema = z.object({
47
+ target_type: z.enum(["engram", "document"]),
48
+ target: z.string(),
49
+ strength: z.number().min(0).max(0.95),
50
+ type: z.enum(["semantic", "temporal", "causal", "co_accessed"]),
51
+ updated_at: z.string().optional()
52
+ });
53
+ var DualCodingSchema = z.object({
54
+ example: z.string().optional(),
55
+ analogy: z.string().optional()
56
+ }).refine(
57
+ (d) => d.example || d.analogy,
58
+ "At least one of example or analogy must be provided"
59
+ );
60
+ var RelationsSchema = z.object({
61
+ broader: z.array(z.string()).default([]),
62
+ narrower: z.array(z.string()).default([]),
63
+ related: z.array(z.string()).default([]),
64
+ conflicts: z.array(z.string()).default([])
65
+ });
66
+ var ProvenanceSchema = z.object({
67
+ origin: z.string(),
68
+ chain: z.array(z.string()).default([]),
69
+ signature: z.string().nullable().default(null),
70
+ license: z.string().default("cc-by-sa-4.0")
71
+ });
72
+ var FeedbackSignalsSchema = z.object({
73
+ positive: z.number().int().default(0),
74
+ negative: z.number().int().default(0),
75
+ neutral: z.number().int().default(0)
76
+ });
77
+ var EntityRefSchema = z.object({
78
+ name: z.string(),
79
+ type: z.enum([
80
+ "person",
81
+ "organization",
82
+ "technology",
83
+ "concept",
84
+ "project",
85
+ "tool",
86
+ "place",
87
+ "event",
88
+ "standard",
89
+ "other"
90
+ ]),
91
+ uri: z.string().url().optional()
92
+ });
93
+ var TemporalSchema = z.object({
94
+ learned_at: z.string(),
95
+ valid_from: z.string().optional(),
96
+ valid_until: z.string().optional(),
97
+ ingested_at: z.string().optional()
98
+ });
99
+ var UsageStatsSchema = z.object({
100
+ injections: z.number().int().default(0),
101
+ hits: z.number().int().default(0),
102
+ misses: z.number().int().default(0),
103
+ last_hit_at: z.string().optional()
104
+ });
105
+ var EpisodicFieldsSchema = z.object({
106
+ emotional_weight: z.number().int().min(1).max(10).default(5),
107
+ confidence: z.number().int().min(1).max(10).default(5),
108
+ trigger_context: z.string().optional(),
109
+ journal_ref: z.string().optional()
110
+ });
111
+ var PreviousVersionRefSchema = z.object({
112
+ event_id: z.string(),
113
+ changed_at: z.string()
114
+ });
115
+ var ExchangeMetadataSchema = z.object({
116
+ fitness_score: z.number().min(0).max(1).optional(),
117
+ environmental_diversity: z.number().int().default(0),
118
+ adoption_count: z.number().int().default(0),
119
+ contradiction_rate: z.number().min(0).max(1).default(0)
120
+ });
121
+ var EngramSchema = z.object({
122
+ // Identity
123
+ id: z.string().regex(/^(ENG|ABS|META)-[A-Za-z0-9-]+$/),
124
+ version: z.number().int().min(1).default(2),
125
+ status: z.enum(["active", "dormant", "retired", "candidate"]),
126
+ consolidated: z.boolean().default(false),
127
+ type: z.enum(["behavioral", "terminological", "procedural", "architectural"]),
128
+ scope: z.string(),
129
+ visibility: z.enum(["private", "public", "template"]).default("private"),
130
+ // Content
131
+ statement: z.string().min(1),
132
+ rationale: z.string().optional(),
133
+ contraindications: z.array(z.string()).optional(),
134
+ // Lineage
135
+ source: z.string().optional(),
136
+ source_patterns: z.array(z.string()).optional(),
137
+ derivation_count: z.number().int().min(0).default(1),
138
+ pack: z.string().nullable().default(null),
139
+ abstract: z.string().nullable().default(null),
140
+ derived_from: z.string().nullable().default(null),
141
+ // Classification
142
+ knowledge_type: KnowledgeTypeSchema.optional(),
143
+ domain: z.string().optional(),
144
+ tags: z.array(z.string()).default([]),
145
+ // Activation (ACT-R model)
146
+ activation: ActivationSchema.default({
147
+ retrieval_strength: 0.7,
148
+ storage_strength: 1,
149
+ frequency: 0,
150
+ last_accessed: (/* @__PURE__ */ new Date()).toISOString().slice(0, 10)
151
+ }),
152
+ // Relations & grounding
153
+ relations: RelationsSchema.optional(),
154
+ associations: z.array(AssociationSchema).default([]),
155
+ knowledge_anchors: z.array(KnowledgeAnchorSchema).default([]),
156
+ dual_coding: DualCodingSchema.optional(),
157
+ // Provenance
158
+ provenance: ProvenanceSchema.optional(),
159
+ // Feedback
160
+ feedback_signals: FeedbackSignalsSchema.default({ positive: 0, negative: 0, neutral: 0 }),
161
+ // === NEW OPTIONAL FIELDS (v2.1) ===
162
+ /** Typed entity references extracted from statement. Enables graph queries. */
163
+ entities: z.array(EntityRefSchema).optional(),
164
+ /** Temporal validity window. When is this knowledge true? */
165
+ temporal: TemporalSchema.optional(),
166
+ /** Automatic usage tracking. Injections, hits, misses. */
167
+ usage: UsageStatsSchema.optional(),
168
+ /** Episodic context: emotional weight, confidence, trigger. */
169
+ episodic: EpisodicFieldsSchema.optional(),
170
+ /** Exchange marketplace metadata: fitness, adoption, diversity. */
171
+ exchange: ExchangeMetadataSchema.optional(),
172
+ /** Extensible key-value data for domain-specific fields. */
173
+ structured_data: z.record(z.string(), z.unknown()).optional(),
174
+ /** Polarity classification: 'do' for directives, 'dont' for prohibitions, null for unclassified. */
175
+ polarity: z.enum(["do", "dont"]).nullable().default(null),
176
+ // === SP1: Memory Intelligence fields ===
177
+ content_hash: z.string().optional(),
178
+ commitment: z.enum(["exploring", "leaning", "decided", "locked"]).optional(),
179
+ locked_at: z.string().optional(),
180
+ locked_reason: z.string().optional(),
181
+ // === SP2: History & Evolution fields ===
182
+ engram_version: z.number().int().min(1).default(1),
183
+ previous_version_ref: PreviousVersionRefSchema.optional(),
184
+ episode_ids: z.array(z.string()).default([]),
185
+ // === SP3: Retrieval & Injection fields ===
186
+ summary: z.string().max(80).optional()
187
+ });
188
+ var EngramSchemaPassthrough = EngramSchema.passthrough();
189
+
190
+ // src/schemas/pack.ts
191
+ import { z as z2 } from "zod";
192
+ var PackManifestSchema = z2.object({
193
+ name: z2.string(),
194
+ version: z2.string(),
195
+ description: z2.string().optional(),
196
+ creator: z2.string().optional(),
197
+ license: z2.string().default("cc-by-sa-4.0"),
198
+ tags: z2.array(z2.string()).default([]),
199
+ metadata: z2.object({
200
+ id: z2.string().optional(),
201
+ injection_policy: z2.enum(["on_match", "on_request", "always"]).default("on_match"),
202
+ match_terms: z2.array(z2.string()).default([]),
203
+ domain: z2.string().optional(),
204
+ engram_count: z2.number().optional()
205
+ }).optional(),
206
+ "x-datacore": z2.object({
207
+ id: z2.string(),
208
+ injection_policy: z2.enum(["on_match", "on_request"]),
209
+ match_terms: z2.array(z2.string()).default([]),
210
+ domain: z2.string().optional(),
211
+ engram_count: z2.number().int().min(0)
212
+ }).optional()
213
+ });
214
+
215
+ // src/engrams.ts
216
+ function loadEngrams(filePath) {
217
+ if (!fs.existsSync(filePath)) return [];
218
+ try {
219
+ const raw = yaml.load(fs.readFileSync(filePath, "utf8"));
220
+ if (!raw?.engrams || !Array.isArray(raw.engrams)) return [];
221
+ const valid = [];
222
+ let skipped = 0;
223
+ for (const entry of raw.engrams) {
224
+ const result = EngramSchemaPassthrough.safeParse(entry);
225
+ if (result.success) valid.push(result.data);
226
+ else skipped++;
227
+ }
228
+ if (skipped > 0) logger.warning(`Skipped ${skipped} invalid engram(s) in ${filePath}`);
229
+ return valid;
230
+ } catch (err) {
231
+ logger.error(`Failed to parse engrams file ${filePath}: ${err}`);
232
+ return [];
233
+ }
234
+ }
235
+ function saveEngrams(filePath, engrams) {
236
+ const content = yaml.dump({ engrams }, { lineWidth: 120, noRefs: true, quotingType: '"' });
237
+ atomicWrite(filePath, content);
238
+ }
239
+ function parseSkillMdFrontmatter(filePath) {
240
+ const content = fs.readFileSync(filePath, "utf8");
241
+ const match = content.match(/^---\n([\s\S]*?)\n---/);
242
+ if (!match) throw new Error(`No frontmatter found in ${filePath}`);
243
+ return yaml.load(match[1]);
244
+ }
245
+ function loadPack(packDir) {
246
+ const skillMdPath = `${packDir}/SKILL.md`;
247
+ const manifestYamlPath = `${packDir}/manifest.yaml`;
248
+ const engramsPath = `${packDir}/engrams.yaml`;
249
+ let rawManifest;
250
+ if (fs.existsSync(skillMdPath)) {
251
+ rawManifest = parseSkillMdFrontmatter(skillMdPath);
252
+ } else if (fs.existsSync(manifestYamlPath)) {
253
+ rawManifest = yaml.load(fs.readFileSync(manifestYamlPath, "utf8"));
254
+ } else {
255
+ throw new Error(`No SKILL.md or manifest.yaml found in ${packDir}`);
256
+ }
257
+ const manifest = PackManifestSchema.parse(rawManifest);
258
+ const engrams = loadEngrams(engramsPath);
259
+ return { manifest, engrams };
260
+ }
261
+ function loadAllPacks(packsDir) {
262
+ if (!fs.existsSync(packsDir)) return [];
263
+ const packs = [];
264
+ for (const entry of fs.readdirSync(packsDir)) {
265
+ const packDir = `${packsDir}/${entry}`;
266
+ if (!fs.statSync(packDir).isDirectory()) continue;
267
+ if (!fs.existsSync(`${packDir}/SKILL.md`) && !fs.existsSync(`${packDir}/manifest.yaml`)) continue;
268
+ try {
269
+ packs.push(loadPack(packDir));
270
+ } catch (err) {
271
+ logger.warning(`Failed to load pack ${entry}: ${err}`);
272
+ }
273
+ }
274
+ return packs;
275
+ }
276
+ function storePrefix(scope) {
277
+ const parts = scope.split(/[:\-_./]/).filter(Boolean);
278
+ if (parts.length >= 2) {
279
+ const p2 = parts[1];
280
+ return (parts[0][0] + p2[0] + (p2[1] || p2[0])).toUpperCase();
281
+ }
282
+ const w = parts[0] || scope;
283
+ if (w.length >= 3) return (w[0] + w[Math.floor(w.length / 2)] + w[w.length - 1]).toUpperCase();
284
+ return (w[0] + (w[1] || w[0]) + (w[2] || w[0])).toUpperCase();
285
+ }
286
+ function generateEngramId(existing) {
287
+ const now = /* @__PURE__ */ new Date();
288
+ const date = now.toISOString().slice(0, 10).replace(/-/g, "");
289
+ const prefix = `ENG-${date.slice(0, 4)}-${date.slice(4)}-`;
290
+ const existingNums = existing.filter((e) => e.id.startsWith(prefix)).map((e) => parseInt(e.id.slice(prefix.length), 10)).filter((n) => !isNaN(n));
291
+ const next = existingNums.length > 0 ? Math.max(...existingNums) + 1 : 1;
292
+ return `${prefix}${String(next).padStart(3, "0")}`;
293
+ }
294
+
295
+ // src/history.ts
296
+ import * as fs2 from "fs";
297
+ import { join } from "path";
298
+ function appendHistory(root, event) {
299
+ const historyDir = join(root, "history");
300
+ if (!fs2.existsSync(historyDir)) {
301
+ fs2.mkdirSync(historyDir, { recursive: true });
302
+ }
303
+ const date = event.timestamp.slice(0, 7);
304
+ const filePath = join(historyDir, `${date}.jsonl`);
305
+ const line = JSON.stringify(event) + "\n";
306
+ fs2.appendFileSync(filePath, line, "utf8");
307
+ }
308
+ function readHistory(root, yearMonth) {
309
+ const filePath = join(root, "history", `${yearMonth}.jsonl`);
310
+ if (!fs2.existsSync(filePath)) return [];
311
+ const content = fs2.readFileSync(filePath, "utf8");
312
+ const lines = content.split("\n").filter((l) => l.trim().length > 0);
313
+ const events = [];
314
+ for (const line of lines) {
315
+ try {
316
+ events.push(JSON.parse(line));
317
+ } catch {
318
+ }
319
+ }
320
+ return events;
321
+ }
322
+ function listHistoryMonths(root) {
323
+ const historyDir = join(root, "history");
324
+ if (!fs2.existsSync(historyDir)) return [];
325
+ return fs2.readdirSync(historyDir).filter((f) => f.endsWith(".jsonl")).map((f) => f.replace(".jsonl", "")).sort();
326
+ }
327
+ function readHistoryForEngram(root, engramId) {
328
+ const months = listHistoryMonths(root);
329
+ const events = [];
330
+ for (const month of months) {
331
+ const monthEvents = readHistory(root, month);
332
+ for (const event of monthEvents) {
333
+ if (event.engram_id === engramId) {
334
+ events.push(event);
335
+ }
336
+ }
337
+ }
338
+ return events;
339
+ }
340
+ function generateEventId() {
341
+ const ts = Date.now();
342
+ const rand = Math.random().toString(36).slice(2, 6);
343
+ return `EVT-${ts}-${rand}`;
344
+ }
345
+
346
+ // src/content-hash.ts
347
+ import { createHash } from "crypto";
348
+ function normalizeStatement(statement) {
349
+ return statement.toLowerCase().replace(/[^\w\s]/g, "").replace(/\s+/g, " ").trim();
350
+ }
351
+ function computeContentHash(statement) {
352
+ const normalized = normalizeStatement(statement);
353
+ return createHash("sha256").update(normalized).digest("hex");
354
+ }
355
+
356
+ // src/dedup.ts
357
+ function buildDedupPrompt(newStatement, candidates) {
358
+ const candidateList = candidates.map(
359
+ (c, i) => `${i + 1}. [${c.id}] (${c.type}${c.domain ? ", domain: " + c.domain : ""})
360
+ "${c.statement}"`
361
+ ).join("\n");
362
+ return `You are a memory deduplication system. Compare a new memory statement against existing ones.
363
+
364
+ NEW STATEMENT:
365
+ "${newStatement}"
366
+
367
+ EXISTING ENGRAMS:
368
+ ${candidateList}
369
+
370
+ For each existing engram, answer:
371
+ 1. RELATIONSHIP: Is the new statement a DUPLICATE (same meaning), EVOLUTION (updated version of same knowledge), COMPLEMENTARY (related but different angle), CONTRADICTORY (opposing), or UNRELATED?
372
+ 2. RICHNESS: Does the new statement contain more specific, actionable information than the existing one? (yes/no)
373
+
374
+ Then give your OVERALL DECISION (exactly one):
375
+ - NOOP: New statement is an exact duplicate of an existing engram. Return the ID.
376
+ - UPDATE: New statement is an evolution with MORE information. Return the ID to update.
377
+ - MERGE: New statement and an existing one are complementary \u2014 combining them preserves both. Return the ID to merge with.
378
+ - ADD: New statement is genuinely new knowledge.
379
+
380
+ Also check: Does the new statement CONTRADICT any existing engram? If yes, list the conflicting IDs.
381
+
382
+ Respond in this exact format:
383
+ DECISION: <ADD|UPDATE|MERGE|NOOP>
384
+ TARGET: <engram ID if UPDATE/MERGE/NOOP, or "none" if ADD>
385
+ CONFLICTS: <comma-separated IDs, or "none">
386
+ REASON: <one sentence explanation>`;
387
+ }
388
+ function buildBatchDedupPrompt(statements, existingEngrams) {
389
+ const stmtList = statements.map((s, i) => `${i + 1}. "${s}"`).join("\n");
390
+ const engramList = existingEngrams.map(
391
+ (e, i) => `${i + 1}. [${e.id}] (${e.type}${e.domain ? ", domain: " + e.domain : ""})
392
+ "${e.statement}"`
393
+ ).join("\n");
394
+ return `You are a memory deduplication system. Compare NEW statements against existing engrams.
395
+
396
+ NEW STATEMENTS:
397
+ ${stmtList}
398
+
399
+ EXISTING ENGRAMS:
400
+ ${engramList}
401
+
402
+ For each NEW statement, decide:
403
+ - NOOP: Exact duplicate of an existing engram.
404
+ - UPDATE: Evolution with more info than existing.
405
+ - MERGE: Complementary with existing \u2014 combine.
406
+ - ADD: Genuinely new knowledge.
407
+
408
+ Also flag any CONTRADICTIONS.
409
+
410
+ Respond with one block per new statement:
411
+ STATEMENT_1:
412
+ DECISION: <ADD|UPDATE|MERGE|NOOP>
413
+ TARGET: <engram ID or "none">
414
+ CONFLICTS: <comma-separated IDs or "none">
415
+
416
+ STATEMENT_2:
417
+ ...`;
418
+ }
419
+ function parseDedupResponse(response) {
420
+ const decisionMatch = response.match(/DECISION:\s*(ADD|UPDATE|MERGE|NOOP)/i);
421
+ const targetMatch = response.match(/TARGET:\s*([^\n]+)/i);
422
+ const conflictsMatch = response.match(/CONFLICTS:\s*([^\n]+)/i);
423
+ const reasonMatch = response.match(/REASON:\s*([^\n]+)/i);
424
+ const decision = decisionMatch?.[1]?.toUpperCase() ?? "ADD";
425
+ const targetRaw = targetMatch?.[1]?.trim() ?? "none";
426
+ const target_id = targetRaw === "none" ? null : targetRaw.replace(/[^A-Za-z0-9-]/g, "");
427
+ const conflictsRaw = conflictsMatch?.[1]?.trim() ?? "none";
428
+ const conflicts = conflictsRaw === "none" ? [] : conflictsRaw.split(",").map((s) => s.trim().replace(/[^A-Za-z0-9-]/g, "")).filter(Boolean);
429
+ const reason = reasonMatch?.[1]?.trim() ?? "";
430
+ return { decision, target_id, conflicts, reason };
431
+ }
432
+
433
+ export {
434
+ EngramSchemaPassthrough,
435
+ logger,
436
+ loadEngrams,
437
+ saveEngrams,
438
+ loadPack,
439
+ loadAllPacks,
440
+ storePrefix,
441
+ generateEngramId,
442
+ appendHistory,
443
+ readHistory,
444
+ listHistoryMonths,
445
+ readHistoryForEngram,
446
+ generateEventId,
447
+ normalizeStatement,
448
+ computeContentHash,
449
+ buildDedupPrompt,
450
+ buildBatchDedupPrompt,
451
+ parseDedupResponse
452
+ };
@@ -1,110 +1,3 @@
1
- // src/fts.ts
2
- var STOP_WORDS = /* @__PURE__ */ new Set([
3
- "the",
4
- "and",
5
- "for",
6
- "that",
7
- "this",
8
- "with",
9
- "from",
10
- "are",
11
- "was",
12
- "were",
13
- "been",
14
- "have",
15
- "has",
16
- "not",
17
- "but",
18
- "its",
19
- "you",
20
- "your",
21
- "can",
22
- "will",
23
- "should",
24
- "would",
25
- "could",
26
- "may",
27
- "might"
28
- ]);
29
- function ftsTokenize(text) {
30
- return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((w) => w.length > 2).filter((w) => !STOP_WORDS.has(w));
31
- }
32
- function engramSearchText(engram) {
33
- const parts = [engram.statement];
34
- if (engram.domain) parts.push(engram.domain.replace(/\./g, " "));
35
- if (engram.tags.length > 0) parts.push(engram.tags.join(" "));
36
- if (engram.entities) {
37
- for (const e of engram.entities) {
38
- parts.push(e.name);
39
- if (e.type !== "other") parts.push(e.type);
40
- }
41
- }
42
- if (engram.temporal) {
43
- if (engram.temporal.valid_from) parts.push(engram.temporal.valid_from);
44
- if (engram.temporal.valid_until) parts.push(engram.temporal.valid_until);
45
- }
46
- if (engram.rationale) parts.push(engram.rationale);
47
- return parts.join(" ");
48
- }
49
- function computeIdf(engrams, queryTokens) {
50
- const N = engrams.length;
51
- if (N === 0) return /* @__PURE__ */ new Map();
52
- const engramTermSets = engrams.map((e) => new Set(ftsTokenize(engramSearchText(e))));
53
- const idf = /* @__PURE__ */ new Map();
54
- for (const qt of queryTokens) {
55
- let df = 0;
56
- for (const termSet of engramTermSets) {
57
- if (termSet.has(qt) || Array.from(termSet).some((t) => t.includes(qt) || qt.includes(t))) {
58
- df++;
59
- }
60
- }
61
- idf.set(qt, Math.max(0, Math.log(N / (1 + df))));
62
- }
63
- return idf;
64
- }
65
- var BM25_K1 = 1.2;
66
- var BM25_B = 0.75;
67
- function ftsScore(engram, queryTokens, idfWeights, avgDocLength) {
68
- const allTerms = ftsTokenize(engramSearchText(engram));
69
- if (queryTokens.length === 0) return 0;
70
- const docLen = allTerms.length;
71
- const avgdl = avgDocLength && avgDocLength > 0 ? avgDocLength : docLen;
72
- const hasNonZeroIdf = idfWeights && Array.from(idfWeights.values()).some((v) => v > 0);
73
- let score = 0;
74
- for (const qt of queryTokens) {
75
- let effectiveIdf;
76
- if (!idfWeights) {
77
- effectiveIdf = 1;
78
- } else if (hasNonZeroIdf) {
79
- effectiveIdf = idfWeights.get(qt) ?? 0;
80
- if (effectiveIdf === 0) continue;
81
- } else {
82
- effectiveIdf = 1;
83
- }
84
- let tf = 0;
85
- for (const t of allTerms) {
86
- if (t.includes(qt) || qt.includes(t)) tf++;
87
- }
88
- if (tf === 0) continue;
89
- const numerator = tf * (BM25_K1 + 1);
90
- const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * docLen / avgdl);
91
- score += effectiveIdf * (numerator / denominator);
92
- }
93
- return score;
94
- }
95
- function searchEngrams(engrams, query, limit = 20) {
96
- const queryTokens = ftsTokenize(query);
97
- if (queryTokens.length === 0) return [];
98
- const idfWeights = computeIdf(engrams, queryTokens);
99
- const avgDocLength = engrams.length > 0 ? engrams.reduce((sum, e) => sum + ftsTokenize(engramSearchText(e)).length, 0) / engrams.length : 0;
100
- return engrams.map((e) => ({ engram: e, score: ftsScore(e, queryTokens, idfWeights, avgDocLength) })).filter((r) => r.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((r) => r.engram);
101
- }
102
-
103
- // src/embeddings.ts
104
- import { existsSync as existsSync2, readFileSync, mkdirSync as mkdirSync2 } from "fs";
105
- import { join as join2 } from "path";
106
- import { createHash } from "crypto";
107
-
108
1
  // src/sync.ts
109
2
  import { execFileSync } from "child_process";
110
3
  import { existsSync, writeFileSync, renameSync, mkdirSync, unlinkSync, statSync } from "fs";
@@ -305,93 +198,9 @@ function atomicWrite(filePath, content) {
305
198
  renameSync(tmp, filePath);
306
199
  }
307
200
 
308
- // src/embeddings.ts
309
- var embedPipeline = null;
310
- var transformersUnavailable = false;
311
- async function getEmbedder() {
312
- if (transformersUnavailable) return null;
313
- if (!embedPipeline) {
314
- try {
315
- const { pipeline } = await import("./transformers.node-PH5YK5EA.js");
316
- embedPipeline = await pipeline("feature-extraction", "Xenova/bge-small-en-v1.5", {
317
- dtype: "fp32"
318
- });
319
- } catch {
320
- transformersUnavailable = true;
321
- return null;
322
- }
323
- }
324
- return embedPipeline;
325
- }
326
- async function embed(text) {
327
- const embedder = await getEmbedder();
328
- if (!embedder) return null;
329
- const result = await embedder(text, { pooling: "cls", normalize: true });
330
- return new Float32Array(result.data);
331
- }
332
- function cosineSimilarity(a, b) {
333
- let dot = 0;
334
- for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
335
- return dot;
336
- }
337
- function loadCache(cachePath) {
338
- if (!existsSync2(cachePath)) return {};
339
- try {
340
- return JSON.parse(readFileSync(cachePath, "utf8"));
341
- } catch {
342
- return {};
343
- }
344
- }
345
- function saveCache(cachePath, cache) {
346
- const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
347
- if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
348
- atomicWrite(cachePath, JSON.stringify(cache));
349
- }
350
- function hashStatement(statement) {
351
- return createHash("sha256").update(statement).digest("hex").slice(0, 16);
352
- }
353
- async function embeddingSearch(engrams, query, limit, storagePath) {
354
- if (engrams.length === 0) return [];
355
- const cachePath = storagePath ? join2(storagePath, ".embeddings-cache.json") : ".embeddings-cache.json";
356
- const cache = loadCache(cachePath);
357
- const queryEmbedding = await embed(query);
358
- if (!queryEmbedding) {
359
- return [];
360
- }
361
- const similarities = [];
362
- for (const engram of engrams) {
363
- const searchText = engramSearchText(engram);
364
- const hash = hashStatement(searchText);
365
- let engramEmbedding;
366
- if (cache[engram.id]?.hash === hash) {
367
- engramEmbedding = new Float32Array(cache[engram.id].embedding);
368
- } else {
369
- const emb = await embed(searchText);
370
- if (!emb) return [];
371
- engramEmbedding = emb;
372
- cache[engram.id] = {
373
- hash,
374
- embedding: Array.from(engramEmbedding)
375
- };
376
- }
377
- const score = cosineSimilarity(queryEmbedding, engramEmbedding);
378
- similarities.push({ engram, score });
379
- }
380
- saveCache(cachePath, cache);
381
- similarities.sort((a, b) => b.score - a.score);
382
- return similarities.slice(0, limit).map((s) => s.engram);
383
- }
384
-
385
201
  export {
386
202
  getSyncStatus,
387
203
  sync,
388
204
  withLock,
389
- atomicWrite,
390
- ftsTokenize,
391
- engramSearchText,
392
- ftsScore,
393
- searchEngrams,
394
- embed,
395
- cosineSimilarity,
396
- embeddingSearch
205
+ atomicWrite
397
206
  };