@hbarefoot/engram 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
@@ -2,11 +2,11 @@
2
2
  <html lang="en">
3
3
  <head>
4
4
  <meta charset="UTF-8" />
5
- <link rel="icon" type="image/svg+xml" href="/vite.svg" />
5
+ <link rel="icon" type="image/png" href="/favicon.png" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>Engram Dashboard</title>
8
- <script type="module" crossorigin src="/assets/index-D9QR_Cnu.js"></script>
9
- <link rel="stylesheet" crossorigin href="/assets/index-BHkLa5w_.css">
8
+ <script type="module" crossorigin src="/assets/index-CK-bEXRL.js"></script>
9
+ <link rel="stylesheet" crossorigin href="/assets/index-CIMIyJGP.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hbarefoot/engram",
3
- "version": "1.1.0",
3
+ "version": "1.3.0",
4
4
  "description": "Persistent memory for AI agents. SQLite for agent state.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -30,7 +30,11 @@
30
30
  "pm2:delete": "pm2 delete engram",
31
31
  "pm2:logs": "pm2 logs engram",
32
32
  "pm2:status": "pm2 status engram",
33
- "pm2:monit": "pm2 monit"
33
+ "pm2:monit": "pm2 monit",
34
+ "build:sidecar": "node scripts/build-sidecar.js",
35
+ "desktop:dev": "cd desktop && npm run tauri dev",
36
+ "desktop:build": "npm run build:sidecar && cd desktop && npm run tauri build",
37
+ "desktop:build:universal": "npm run build:sidecar && cd desktop && npm run tauri build -- --target universal-apple-darwin"
34
38
  },
35
39
  "keywords": [
36
40
  "ai",
@@ -64,8 +68,11 @@
64
68
  "@modelcontextprotocol/sdk": "^1.0.0",
65
69
  "@xenova/transformers": "^2.17.0",
66
70
  "better-sqlite3": "^11.0.0",
71
+ "chalk": "^5.6.2",
72
+ "cli-table3": "^0.6.5",
67
73
  "commander": "^12.0.0",
68
74
  "fastify": "^5.0.0",
75
+ "ora": "^8.2.0",
69
76
  "yaml": "^2.4.0"
70
77
  },
71
78
  "devDependencies": {
@@ -12,6 +12,16 @@ const __dirname = path.dirname(__filename);
12
12
  */
13
13
  let cachedPipeline = null;
14
14
 
15
+ /**
16
+ * Whether the pipeline is currently being initialized
17
+ */
18
+ let pipelineLoading = false;
19
+
20
+ /**
21
+ * In-flight initialization promise to prevent concurrent loads
22
+ */
23
+ let initPromise = null;
24
+
15
25
  /**
16
26
  * Model configuration
17
27
  */
@@ -20,6 +30,72 @@ const MODEL_CONFIG = {
20
30
  task: 'feature-extraction'
21
31
  };
22
32
 
33
+ /**
34
+ * Model subdirectory name within a models cache root
35
+ */
36
+ const MODEL_SUBDIR = path.join('Xenova', 'all-MiniLM-L6-v2');
37
+
38
+ /**
39
+ * Build a list of known locations where the embedding model may already be cached.
40
+ * Each entry uses the Xenova cache layout (e.g. .../Xenova/all-MiniLM-L6-v2).
41
+ * @param {Object} [options]
42
+ * @param {boolean} [options.seedable=false] - If true, only return paths with Xenova-compatible layout (safe to cpSync)
43
+ * @returns {string[]} Array of candidate model directory paths
44
+ */
45
+ function getKnownModelSources({ seedable = false } = {}) {
46
+ const sources = [
47
+ // Bundled alongside sidecar (production .app bundle — __dirname is the resources dir)
48
+ path.resolve(__dirname, 'models', MODEL_SUBDIR),
49
+ // node_modules cache (local dev / source)
50
+ path.resolve(__dirname, '../../node_modules/@xenova/transformers/.cache', MODEL_SUBDIR),
51
+ ];
52
+
53
+ // Check TRANSFORMERS_CACHE env var (set during initializePipeline)
54
+ if (process.env.TRANSFORMERS_CACHE) {
55
+ sources.push(path.join(process.env.TRANSFORMERS_CACHE, MODEL_SUBDIR));
56
+ }
57
+
58
+ // HuggingFace Hub cache uses a different layout (models--Xenova--*/snapshots/*)
59
+ // so it's only useful for availability detection, not for seeding via cpSync
60
+ if (!seedable) {
61
+ const homeDir = process.env.HOME || process.env.USERPROFILE;
62
+ if (homeDir) {
63
+ sources.push(path.join(homeDir, '.cache', 'huggingface', 'hub', 'models--Xenova--all-MiniLM-L6-v2'));
64
+ }
65
+ }
66
+
67
+ return sources;
68
+ }
69
+
70
+ /**
71
+ * Seed the models cache directory from known cache locations.
72
+ * Copies model files to modelsPath so both source and bundled contexts work.
73
+ * @param {string} modelsPath - Target models directory (e.g. ~/.engram/models)
74
+ */
75
+ function seedModelCache(modelsPath) {
76
+ const modelSubdir = path.join(modelsPath, MODEL_SUBDIR);
77
+
78
+ // Already seeded?
79
+ try {
80
+ if (fs.existsSync(modelSubdir) && fs.readdirSync(modelSubdir).length > 0) {
81
+ return;
82
+ }
83
+ } catch { /* continue */ }
84
+
85
+ for (const src of getKnownModelSources({ seedable: true })) {
86
+ try {
87
+ if (fs.existsSync(src) && fs.readdirSync(src).length > 0) {
88
+ logger.info('Seeding model cache', { from: src, to: modelSubdir });
89
+ fs.mkdirSync(modelSubdir, { recursive: true });
90
+ fs.cpSync(src, modelSubdir, { recursive: true });
91
+ return;
92
+ }
93
+ } catch (e) {
94
+ logger.debug('Could not seed from source', { src, error: e.message });
95
+ }
96
+ }
97
+ }
98
+
23
99
  /**
24
100
  * Initialize the embedding pipeline
25
101
  * Downloads model on first use and caches it
@@ -32,7 +108,24 @@ export async function initializePipeline(modelsPath) {
32
108
  return cachedPipeline;
33
109
  }
34
110
 
111
+ // If another call is already loading, wait for it instead of starting a second load
112
+ if (initPromise) {
113
+ logger.debug('Pipeline init already in progress, waiting...');
114
+ return initPromise;
115
+ }
116
+
117
+ initPromise = _doInitializePipeline(modelsPath);
35
118
  try {
119
+ return await initPromise;
120
+ } finally {
121
+ initPromise = null;
122
+ }
123
+ }
124
+
125
+ /** @private */
126
+ async function _doInitializePipeline(modelsPath) {
127
+ try {
128
+ pipelineLoading = true;
36
129
  logger.info('Initializing embedding model', { model: MODEL_CONFIG.name });
37
130
 
38
131
  // Ensure models directory exists
@@ -40,6 +133,9 @@ export async function initializePipeline(modelsPath) {
40
133
  fs.mkdirSync(modelsPath, { recursive: true });
41
134
  }
42
135
 
136
+ // Seed cache from known locations if empty
137
+ seedModelCache(modelsPath);
138
+
43
139
  // Set cache directory for transformers
44
140
  process.env.TRANSFORMERS_CACHE = modelsPath;
45
141
 
@@ -60,10 +156,12 @@ export async function initializePipeline(modelsPath) {
60
156
  }
61
157
  );
62
158
 
159
+ pipelineLoading = false;
63
160
  logger.info('Embedding model loaded successfully');
64
161
 
65
162
  return cachedPipeline;
66
163
  } catch (error) {
164
+ pipelineLoading = false;
67
165
  logger.error('Failed to initialize embedding pipeline', { error: error.message });
68
166
  throw error;
69
167
  }
@@ -165,28 +263,28 @@ export function cosineSimilarity(a, b) {
165
263
  * @returns {Object} Object with available flag and actual path
166
264
  */
167
265
  export function isModelAvailable(modelsPath) {
168
- // First check the provided modelsPath
169
- if (fs.existsSync(modelsPath) && fs.readdirSync(modelsPath).length > 0) {
266
+ // If the pipeline is already loaded, the model is definitely available
267
+ if (cachedPipeline) {
170
268
  return { available: true, path: modelsPath };
171
269
  }
172
270
 
173
- // Check Xenova transformers cache in node_modules (most common for local dev)
174
- const possiblePaths = [
175
- path.resolve(__dirname, '../../node_modules/@xenova/transformers/.cache/Xenova/all-MiniLM-L6-v2'),
176
- ];
177
-
178
- // Add home directory cache paths
179
- const homeDir = process.env.HOME || process.env.USERPROFILE;
180
- if (homeDir) {
181
- possiblePaths.push(path.join(homeDir, '.cache', 'huggingface', 'hub', 'models--Xenova--all-MiniLM-L6-v2'));
271
+ // Check the model subdirectory within modelsPath (not just the top-level dir)
272
+ const modelSubdir = path.join(modelsPath, MODEL_SUBDIR);
273
+ try {
274
+ if (fs.existsSync(modelSubdir) && fs.readdirSync(modelSubdir).length > 0) {
275
+ return { available: true, path: modelsPath };
276
+ }
277
+ } catch {
278
+ // Continue checking other paths
182
279
  }
183
280
 
184
- for (const cachePath of possiblePaths) {
281
+ // Check known cache locations
282
+ for (const cachePath of getKnownModelSources()) {
185
283
  try {
186
284
  if (fs.existsSync(cachePath) && fs.readdirSync(cachePath).length > 0) {
187
285
  return { available: true, path: cachePath };
188
286
  }
189
- } catch (e) {
287
+ } catch {
190
288
  // Continue checking other paths
191
289
  }
192
290
  }
@@ -229,11 +327,12 @@ function getDirectorySize(dirPath) {
229
327
  */
230
328
  export function getModelInfo(modelsPath) {
231
329
  const modelCheck = isModelAvailable(modelsPath);
232
- const available = modelCheck.available;
330
+ const pipelineLoaded = cachedPipeline !== null;
331
+ const available = modelCheck.available || pipelineLoaded;
233
332
  const actualPath = modelCheck.path;
234
333
 
235
334
  let size = 0;
236
- if (available) {
335
+ if (modelCheck.available) {
237
336
  size = getDirectorySize(actualPath);
238
337
  }
239
338
 
@@ -241,7 +340,8 @@ export function getModelInfo(modelsPath) {
241
340
  name: MODEL_CONFIG.name,
242
341
  task: MODEL_CONFIG.task,
243
342
  available,
244
- cached: cachedPipeline !== null,
343
+ loading: pipelineLoading,
344
+ cached: pipelineLoaded,
245
345
  sizeBytes: size,
246
346
  sizeMB: Math.round(size / (1024 * 1024)),
247
347
  path: actualPath
@@ -0,0 +1,259 @@
1
+ import * as logger from '../utils/logger.js';
2
+
3
+ // Lazy-load parsers to avoid importing unused ones
4
+ const PARSERS = {
5
+ cursorrules: () => import('./parsers/cursorrules.js'),
6
+ claude: () => import('./parsers/claude.js'),
7
+ package: () => import('./parsers/package.js'),
8
+ git: () => import('./parsers/git.js'),
9
+ ssh: () => import('./parsers/ssh.js'),
10
+ shell: () => import('./parsers/shell.js'),
11
+ obsidian: () => import('./parsers/obsidian.js'),
12
+ env: () => import('./parsers/env.js')
13
+ };
14
+
15
+ /**
16
+ * Get metadata for all available import sources
17
+ * @returns {Object[]} Array of source metadata
18
+ */
19
+ export async function getSourceMeta() {
20
+ const sources = [];
21
+
22
+ for (const [key, loader] of Object.entries(PARSERS)) {
23
+ try {
24
+ const parser = await loader();
25
+ sources.push({
26
+ id: key,
27
+ ...parser.meta
28
+ });
29
+ } catch (error) {
30
+ logger.warn(`Failed to load parser: ${key}`, { error: error.message });
31
+ }
32
+ }
33
+
34
+ return sources;
35
+ }
36
+
37
+ /**
38
+ * Detect which import sources are available on this system
39
+ * @param {Object} [options] - Detection options
40
+ * @param {string} [options.cwd] - Working directory
41
+ * @returns {Object[]} Array of { id, meta, detected: { found, path } }
42
+ */
43
+ export async function detectSources(options = {}) {
44
+ const results = [];
45
+
46
+ for (const [key, loader] of Object.entries(PARSERS)) {
47
+ try {
48
+ const parser = await loader();
49
+ const detection = parser.detect ? parser.detect(options) : { found: false, path: null };
50
+
51
+ results.push({
52
+ id: key,
53
+ ...parser.meta,
54
+ detected: detection
55
+ });
56
+ } catch (error) {
57
+ logger.warn(`Failed to detect source: ${key}`, { error: error.message });
58
+ results.push({
59
+ id: key,
60
+ name: key,
61
+ label: key,
62
+ description: 'Failed to load parser',
63
+ detected: { found: false, path: null, error: error.message }
64
+ });
65
+ }
66
+ }
67
+
68
+ return results;
69
+ }
70
+
71
+ /**
72
+ * Scan selected sources and extract candidate memories
73
+ * @param {string[]} sourceIds - Array of source IDs to scan
74
+ * @param {Object} [options] - Scan options
75
+ * @param {string} [options.cwd] - Working directory
76
+ * @returns {Object} Scan results
77
+ */
78
+ export async function scanSources(sourceIds, options = {}) {
79
+ const startTime = Date.now();
80
+ const allMemories = [];
81
+ const allSkipped = [];
82
+ const allWarnings = [];
83
+ const sourceResults = {};
84
+
85
+ for (const sourceId of sourceIds) {
86
+ const loader = PARSERS[sourceId];
87
+ if (!loader) {
88
+ allWarnings.push(`Unknown source: ${sourceId}`);
89
+ continue;
90
+ }
91
+
92
+ try {
93
+ const parser = await loader();
94
+ const result = await parser.parse(options);
95
+
96
+ sourceResults[sourceId] = {
97
+ memoriesCount: result.memories.length,
98
+ skippedCount: result.skipped.length,
99
+ warnings: result.warnings
100
+ };
101
+
102
+ allMemories.push(...result.memories);
103
+ allSkipped.push(...result.skipped.map(s => ({ ...s, source: sourceId })));
104
+ allWarnings.push(...result.warnings.map(w => `[${sourceId}] ${w}`));
105
+
106
+ logger.info(`Scanned source: ${sourceId}`, {
107
+ memories: result.memories.length,
108
+ skipped: result.skipped.length
109
+ });
110
+ } catch (error) {
111
+ logger.error(`Failed to scan source: ${sourceId}`, { error: error.message });
112
+ allWarnings.push(`[${sourceId}] Scan failed: ${error.message}`);
113
+ sourceResults[sourceId] = { memoriesCount: 0, skippedCount: 0, warnings: [error.message] };
114
+ }
115
+ }
116
+
117
+ return {
118
+ memories: allMemories,
119
+ skipped: allSkipped,
120
+ warnings: allWarnings,
121
+ sources: sourceResults,
122
+ duration: Date.now() - startTime
123
+ };
124
+ }
125
+
126
+ /**
127
+ * Commit scanned memories to the database
128
+ * @param {Object} db - SQLite database instance
129
+ * @param {Object[]} memories - Memory candidates to commit
130
+ * @param {Object} [options] - Commit options
131
+ * @param {string} [options.namespace] - Override namespace for all memories
132
+ * @param {Function} [options.createMemoryFn] - Custom createMemory function
133
+ * @param {Function} [options.generateEmbeddingFn] - Custom embedding function
134
+ * @param {Function} [options.validateContentFn] - Custom validation function
135
+ * @returns {Object} Commit results
136
+ */
137
+ export async function commitMemories(db, memories, options = {}) {
138
+ const {
139
+ namespace,
140
+ createMemoryFn,
141
+ generateEmbeddingFn,
142
+ validateContentFn
143
+ } = options;
144
+
145
+ const startTime = Date.now();
146
+ const results = {
147
+ created: 0,
148
+ duplicates: 0,
149
+ merged: 0,
150
+ rejected: 0,
151
+ errors: [],
152
+ details: []
153
+ };
154
+
155
+ // Import store functions
156
+ const { createMemoryWithDedup } = await import('../memory/store.js');
157
+ const commitFn = createMemoryFn || createMemoryWithDedup;
158
+
159
+ // Optionally import embedding function
160
+ let embedFn = generateEmbeddingFn;
161
+ let modelsPath;
162
+ if (!embedFn) {
163
+ try {
164
+ const { generateEmbedding } = await import('../embed/index.js');
165
+ const { loadConfig, getModelsPath } = await import('../config/index.js');
166
+ const config = loadConfig();
167
+ modelsPath = getModelsPath(config);
168
+ embedFn = (content) => generateEmbedding(content, modelsPath);
169
+ } catch {
170
+ logger.warn('Embeddings not available, committing without embeddings');
171
+ }
172
+ }
173
+
174
+ // Optionally import validation
175
+ let validateFn = validateContentFn;
176
+ if (!validateFn) {
177
+ const { validateContent } = await import('../extract/secrets.js');
178
+ validateFn = (content) => validateContent(content, { autoRedact: true });
179
+ }
180
+
181
+ for (const memory of memories) {
182
+ try {
183
+ // Final secret validation
184
+ const validation = validateFn(memory.content);
185
+ if (!validation.valid) {
186
+ results.rejected++;
187
+ results.details.push({
188
+ content: memory.content.substring(0, 50),
189
+ status: 'rejected',
190
+ reason: 'Failed secret detection'
191
+ });
192
+ continue;
193
+ }
194
+
195
+ // Generate embedding
196
+ let embedding = null;
197
+ if (embedFn) {
198
+ try {
199
+ embedding = await embedFn(validation.content);
200
+ } catch {
201
+ // Continue without embedding
202
+ }
203
+ }
204
+
205
+ const memoryData = {
206
+ content: validation.content,
207
+ category: memory.category || 'fact',
208
+ entity: memory.entity || null,
209
+ confidence: memory.confidence || 0.8,
210
+ namespace: namespace || memory.namespace || 'default',
211
+ tags: memory.tags || [],
212
+ source: memory.source || 'import',
213
+ embedding
214
+ };
215
+
216
+ const commitResult = commitFn(db, memoryData);
217
+
218
+ switch (commitResult.status) {
219
+ case 'created':
220
+ results.created++;
221
+ break;
222
+ case 'duplicate':
223
+ results.duplicates++;
224
+ break;
225
+ case 'merged':
226
+ results.merged++;
227
+ break;
228
+ default:
229
+ results.created++;
230
+ }
231
+
232
+ results.details.push({
233
+ content: memory.content.substring(0, 50),
234
+ status: commitResult.status,
235
+ id: commitResult.id
236
+ });
237
+ } catch (error) {
238
+ results.errors.push({
239
+ content: memory.content.substring(0, 50),
240
+ error: error.message
241
+ });
242
+ }
243
+ }
244
+
245
+ results.duration = Date.now() - startTime;
246
+ results.total = memories.length;
247
+
248
+ logger.info('Import commit complete', {
249
+ total: results.total,
250
+ created: results.created,
251
+ duplicates: results.duplicates,
252
+ merged: results.merged,
253
+ rejected: results.rejected,
254
+ errors: results.errors.length,
255
+ duration: results.duration
256
+ });
257
+
258
+ return results;
259
+ }