@plur-ai/core 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,32 +62,42 @@ function computeIdf(engrams, queryTokens) {
62
62
  }
63
63
  return idf;
64
64
  }
65
- function ftsScore(engram, queryTokens, idfWeights) {
65
+ var BM25_K1 = 1.2;
66
+ var BM25_B = 0.75;
67
+ function ftsScore(engram, queryTokens, idfWeights, avgDocLength) {
66
68
  const allTerms = ftsTokenize(engramSearchText(engram));
67
69
  if (queryTokens.length === 0) return 0;
68
- let weightedHits = 0;
69
- let totalWeight = 0;
70
+ const docLen = allTerms.length;
71
+ const avgdl = avgDocLength && avgDocLength > 0 ? avgDocLength : docLen;
72
+ const hasNonZeroIdf = idfWeights && Array.from(idfWeights.values()).some((v) => v > 0);
73
+ let score = 0;
70
74
  for (const qt of queryTokens) {
71
- const weight = idfWeights?.get(qt) ?? 1;
72
- totalWeight += weight;
73
- if (allTerms.some((t) => t.includes(qt) || qt.includes(t))) {
74
- weightedHits += weight;
75
+ let effectiveIdf;
76
+ if (!idfWeights) {
77
+ effectiveIdf = 1;
78
+ } else if (hasNonZeroIdf) {
79
+ effectiveIdf = idfWeights.get(qt) ?? 0;
80
+ if (effectiveIdf === 0) continue;
81
+ } else {
82
+ effectiveIdf = 1;
75
83
  }
76
- }
77
- if (totalWeight === 0) {
78
- let matches = 0;
79
- for (const qt of queryTokens) {
80
- if (allTerms.some((t) => t.includes(qt) || qt.includes(t))) matches++;
84
+ let tf = 0;
85
+ for (const t of allTerms) {
86
+ if (t.includes(qt) || qt.includes(t)) tf++;
81
87
  }
82
- return matches / queryTokens.length;
88
+ if (tf === 0) continue;
89
+ const numerator = tf * (BM25_K1 + 1);
90
+ const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * docLen / avgdl);
91
+ score += effectiveIdf * (numerator / denominator);
83
92
  }
84
- return weightedHits / totalWeight;
93
+ return score;
85
94
  }
86
95
  function searchEngrams(engrams, query, limit = 20) {
87
96
  const queryTokens = ftsTokenize(query);
88
97
  if (queryTokens.length === 0) return [];
89
98
  const idfWeights = computeIdf(engrams, queryTokens);
90
- return engrams.map((e) => ({ engram: e, score: ftsScore(e, queryTokens, idfWeights) })).filter((r) => r.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((r) => r.engram);
99
+ const avgDocLength = engrams.length > 0 ? engrams.reduce((sum, e) => sum + ftsTokenize(engramSearchText(e)).length, 0) / engrams.length : 0;
100
+ return engrams.map((e) => ({ engram: e, score: ftsScore(e, queryTokens, idfWeights, avgDocLength) })).filter((r) => r.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((r) => r.engram);
91
101
  }
92
102
 
93
103
  // src/embeddings.ts
@@ -97,7 +107,7 @@ import { createHash } from "crypto";
97
107
 
98
108
  // src/sync.ts
99
109
  import { execFileSync } from "child_process";
100
- import { existsSync, writeFileSync, renameSync, mkdirSync } from "fs";
110
+ import { existsSync, writeFileSync, renameSync, mkdirSync, unlinkSync, statSync } from "fs";
101
111
  import { join, dirname } from "path";
102
112
  var GITIGNORE = `# PLUR \u2014 derived/cache files (regenerated automatically)
103
113
  embeddings/
@@ -249,6 +259,44 @@ function sync(root, remote) {
249
259
  files_changed: filesChanged
250
260
  };
251
261
  }
262
+ function withLock(filePath, fn, options) {
263
+ const lockPath = filePath + ".lock";
264
+ const maxRetries = options?.maxRetries ?? 5;
265
+ const baseDelay = options?.baseDelay ?? 100;
266
+ const staleThreshold = options?.staleThreshold ?? 1e4;
267
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
268
+ try {
269
+ writeFileSync(lockPath, `${process.pid}`, { flag: "wx" });
270
+ break;
271
+ } catch (err) {
272
+ if (err.code !== "EEXIST") throw err;
273
+ try {
274
+ const stat = statSync(lockPath);
275
+ if (Date.now() - stat.mtimeMs > staleThreshold) {
276
+ unlinkSync(lockPath);
277
+ continue;
278
+ }
279
+ } catch {
280
+ continue;
281
+ }
282
+ if (attempt === maxRetries) {
283
+ throw new Error(`Failed to acquire lock on ${filePath} after ${maxRetries} retries`);
284
+ }
285
+ const delay = baseDelay * Math.pow(2, attempt);
286
+ const end = Date.now() + delay;
287
+ while (Date.now() < end) {
288
+ }
289
+ }
290
+ }
291
+ try {
292
+ return fn();
293
+ } finally {
294
+ try {
295
+ unlinkSync(lockPath);
296
+ } catch {
297
+ }
298
+ }
299
+ }
252
300
  function atomicWrite(filePath, content) {
253
301
  const dir = dirname(filePath);
254
302
  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
@@ -337,6 +385,7 @@ async function embeddingSearch(engrams, query, limit, storagePath) {
337
385
  export {
338
386
  getSyncStatus,
339
387
  sync,
388
+ withLock,
340
389
  atomicWrite,
341
390
  ftsTokenize,
342
391
  engramSearchText,
@@ -2,7 +2,7 @@ import {
2
2
  cosineSimilarity,
3
3
  embed,
4
4
  embeddingSearch
5
- } from "./chunk-WPD4MPTT.js";
5
+ } from "./chunk-KMVQYBNP.js";
6
6
  import "./chunk-2ZDO52B4.js";
7
7
  export {
8
8
  cosineSimilarity,
package/dist/index.d.ts CHANGED
@@ -616,6 +616,7 @@ declare const PlurConfigSchema: z.ZodObject<{
616
616
  co_access?: boolean | undefined;
617
617
  }>>>;
618
618
  allow_secrets: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
619
+ index: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
619
620
  }, "strip", z.ZodTypeAny, {
620
621
  auto_learn?: boolean | undefined;
621
622
  auto_capture?: boolean | undefined;
@@ -629,6 +630,7 @@ declare const PlurConfigSchema: z.ZodObject<{
629
630
  co_access: boolean;
630
631
  } | undefined;
631
632
  allow_secrets?: boolean | undefined;
633
+ index?: boolean | undefined;
632
634
  }, {
633
635
  auto_learn?: boolean | undefined;
634
636
  auto_capture?: boolean | undefined;
@@ -642,6 +644,7 @@ declare const PlurConfigSchema: z.ZodObject<{
642
644
  co_access?: boolean | undefined;
643
645
  } | undefined;
644
646
  allow_secrets?: boolean | undefined;
647
+ index?: boolean | undefined;
645
648
  }>;
646
649
  type PlurConfig = z.infer<typeof PlurConfigSchema>;
647
650
 
@@ -1164,9 +1167,36 @@ interface PlurPaths {
1164
1167
  packs: string;
1165
1168
  exchange: string;
1166
1169
  config: string;
1170
+ db: string;
1167
1171
  }
1168
1172
  declare function detectPlurStorage(explicitPath?: string): PlurPaths;
1169
1173
 
1174
+ declare class IndexedStorage {
1175
+ private dbPath;
1176
+ private engramsPath;
1177
+ private db;
1178
+ constructor(engramsPath: string, dbPath: string);
1179
+ private getDb;
1180
+ /** Load all engrams from SQLite index. Auto-rebuilds if db missing. */
1181
+ loadAll(): Engram[];
1182
+ /** Load engrams with SQL-level filtering. */
1183
+ loadFiltered(filter: {
1184
+ status?: string;
1185
+ scope?: string;
1186
+ domain?: string;
1187
+ }): Engram[];
1188
+ /** Count engrams with optional status filter. */
1189
+ count(filter?: {
1190
+ status?: string;
1191
+ }): number;
1192
+ /** Sync SQLite index from YAML source of truth. */
1193
+ syncFromYaml(): void;
1194
+ /** Drop and rebuild the entire index from YAML. */
1195
+ reindex(): void;
1196
+ /** Close the database connection. */
1197
+ close(): void;
1198
+ }
1199
+
1170
1200
  /**
1171
1201
  * Non-blocking version check against npm registry.
1172
1202
  * Caches result in memory — one fetch per process lifetime.
@@ -1212,6 +1242,7 @@ interface StatusResult {
1212
1242
  declare class Plur {
1213
1243
  private paths;
1214
1244
  private config;
1245
+ private indexedStorage;
1215
1246
  constructor(options?: {
1216
1247
  path?: string;
1217
1248
  });
@@ -1263,6 +1294,15 @@ declare class Plur {
1263
1294
  updateEngram(updated: Engram): boolean;
1264
1295
  /** Set engram status to 'retired'. */
1265
1296
  forget(id: string, reason?: string): void;
1297
+ /** Remove retired engrams from storage. Returns count of removed and remaining. */
1298
+ compact(): {
1299
+ removed: number;
1300
+ remaining: number;
1301
+ };
1302
+ /** Rebuild SQLite index from YAML source of truth. Only works when index: true. */
1303
+ reindex(): void;
1304
+ /** Sync SQLite index after YAML write (no-op if index disabled) */
1305
+ private _syncIndex;
1266
1306
  /** Capture an episodic memory. */
1267
1307
  capture(summary: string, context?: CaptureContext): Episode;
1268
1308
  /** Query the episode timeline. */
@@ -1294,4 +1334,4 @@ declare class Plur {
1294
1334
  status(): StatusResult;
1295
1335
  }
1296
1336
 
1297
- export { type AlignmentResult, type Association, type CaptureContext, type DomainCoverage, DomainCoverageSchema, type Engram, type EngramCluster, type Episode, type EvidenceEntry, EvidenceEntrySchema, type ExtractOptions, type ExtractionResult, type Falsification, FalsificationSchema, type HierarchyPosition, HierarchyPositionSchema, type IngestCandidate, type IngestOptions, type InjectOptions, type InjectionResult, type KnowledgeAnchor, type LearnContext, type LlmFunction, type MemberAlignment, type MetaConfidence, MetaConfidenceSchema, type MetaField, MetaFieldSchema, PLATITUDE_PATTERNS, type PackManifest, Plur, type PlurConfig, type PlurPaths, type RecallOptions, type RelationalAnalysis, type RelationalTriple, SessionBreadcrumbs, type StatusResult, type StructuralTemplate, StructuralTemplateSchema, type SyncResult, type SyncStatus, type TimelineQuery, type TypedRole, type ValidationResult, type VersionCheckResult, alignCluster, analyzeStructure, checkForUpdate, classifyPolarity, clearVersionCache, clusterByStructure, computeConfidence, computeMetaConfidence, confidenceBand, detectPlurStorage, detectSecrets, engramSearchText, extractMetaEngrams, formulateMetaEngram, generateGuardrails, getCachedUpdateCheck, isPlatitude, organizeHierarchy, tokenSimilarity, validateMetaEngram };
1337
+ export { type AlignmentResult, type Association, type CaptureContext, type DomainCoverage, DomainCoverageSchema, type Engram, type EngramCluster, type Episode, type EvidenceEntry, EvidenceEntrySchema, type ExtractOptions, type ExtractionResult, type Falsification, FalsificationSchema, type HierarchyPosition, HierarchyPositionSchema, IndexedStorage, type IngestCandidate, type IngestOptions, type InjectOptions, type InjectionResult, type KnowledgeAnchor, type LearnContext, type LlmFunction, type MemberAlignment, type MetaConfidence, MetaConfidenceSchema, type MetaField, MetaFieldSchema, PLATITUDE_PATTERNS, type PackManifest, Plur, type PlurConfig, type PlurPaths, type RecallOptions, type RelationalAnalysis, type RelationalTriple, SessionBreadcrumbs, type StatusResult, type StructuralTemplate, StructuralTemplateSchema, type SyncResult, type SyncStatus, type TimelineQuery, type TypedRole, type ValidationResult, type VersionCheckResult, alignCluster, analyzeStructure, checkForUpdate, classifyPolarity, clearVersionCache, clusterByStructure, computeConfidence, computeMetaConfidence, confidenceBand, detectPlurStorage, detectSecrets, engramSearchText, extractMetaEngrams, formulateMetaEngram, generateGuardrails, getCachedUpdateCheck, isPlatitude, organizeHierarchy, tokenSimilarity, validateMetaEngram };