@yamo/memory-mesh 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +80 -0
- package/bin/memory_mesh.js +69 -0
- package/bin/scrubber.js +81 -0
- package/index.d.ts +111 -0
- package/lib/adapters/index.js +3 -0
- package/lib/embeddings/factory.js +150 -0
- package/lib/embeddings/index.js +2 -0
- package/lib/embeddings/service.js +586 -0
- package/lib/index.js +18 -0
- package/lib/lancedb/client.js +631 -0
- package/lib/lancedb/config.js +215 -0
- package/lib/lancedb/errors.js +144 -0
- package/lib/lancedb/index.js +4 -0
- package/lib/lancedb/schema.js +197 -0
- package/lib/memory/index.js +3 -0
- package/lib/memory/memory-context-manager.js +388 -0
- package/lib/memory/memory-mesh.js +910 -0
- package/lib/memory/memory-translator.js +130 -0
- package/lib/memory/migrate-memory.js +227 -0
- package/lib/memory/migrate-to-v2.js +120 -0
- package/lib/memory/scorer.js +85 -0
- package/lib/memory/vector-memory.js +364 -0
- package/lib/privacy/audit-logger.js +176 -0
- package/lib/privacy/dlp-redactor.js +72 -0
- package/lib/privacy/index.js +10 -0
- package/lib/reporting/skill-report-generator.js +283 -0
- package/lib/scrubber/.gitkeep +1 -0
- package/lib/scrubber/config/defaults.js +62 -0
- package/lib/scrubber/errors/scrubber-error.js +43 -0
- package/lib/scrubber/index.js +25 -0
- package/lib/scrubber/scrubber.js +130 -0
- package/lib/scrubber/stages/chunker.js +103 -0
- package/lib/scrubber/stages/metadata-annotator.js +74 -0
- package/lib/scrubber/stages/normalizer.js +59 -0
- package/lib/scrubber/stages/semantic-filter.js +61 -0
- package/lib/scrubber/stages/structural-cleaner.js +82 -0
- package/lib/scrubber/stages/validator.js +66 -0
- package/lib/scrubber/telemetry.js +66 -0
- package/lib/scrubber/utils/hash.js +39 -0
- package/lib/scrubber/utils/html-parser.js +45 -0
- package/lib/scrubber/utils/pattern-matcher.js +63 -0
- package/lib/scrubber/utils/token-counter.js +31 -0
- package/lib/search/filter.js +275 -0
- package/lib/search/hybrid.js +137 -0
- package/lib/search/index.js +3 -0
- package/lib/search/pattern-miner.js +160 -0
- package/lib/utils/error-sanitizer.js +84 -0
- package/lib/utils/handoff-validator.js +85 -0
- package/lib/utils/index.js +4 -0
- package/lib/utils/spinner.js +190 -0
- package/lib/utils/streaming-client.js +128 -0
- package/package.json +39 -0
- package/skills/SKILL.md +462 -0
- package/skills/skill-scrubber.yamo +41 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB Configuration Loader
|
|
3
|
+
* Loads and validates configuration from environment variables
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import path from "path";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Default configuration values
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULTS = {
|
|
12
|
+
// LanceDB Configuration
|
|
13
|
+
LANCEDB_URI: './runtime/data/lancedb',
|
|
14
|
+
LANCEDB_MEMORY_TABLE: 'memory_entries',
|
|
15
|
+
LANCEDB_MAX_CACHE_SIZE: '2GB',
|
|
16
|
+
|
|
17
|
+
// Embedding Model Configuration
|
|
18
|
+
EMBEDDING_MODEL_TYPE: 'local',
|
|
19
|
+
EMBEDDING_MODEL_NAME: 'Xenova/all-MiniLM-L6-v2',
|
|
20
|
+
EMBEDDING_DIMENSION: '384',
|
|
21
|
+
EMBEDDING_BATCH_SIZE: '32',
|
|
22
|
+
EMBEDDING_NORMALIZE: 'true',
|
|
23
|
+
|
|
24
|
+
// API-based Embeddings
|
|
25
|
+
OPENAI_EMBEDDING_MODEL: 'text-embedding-3-small',
|
|
26
|
+
|
|
27
|
+
// Search Configuration
|
|
28
|
+
DEFAULT_TOP_K: '10',
|
|
29
|
+
DEFAULT_SIMILARITY_THRESHOLD: '0.7',
|
|
30
|
+
ENABLE_HYBRID_SEARCH: 'true',
|
|
31
|
+
HYBRID_SEARCH_ALPHA: '0.5',
|
|
32
|
+
|
|
33
|
+
// Performance Tuning
|
|
34
|
+
VECTOR_INDEX_TYPE: 'ivf_pq',
|
|
35
|
+
IVF_PARTITIONS: '256',
|
|
36
|
+
PQ_BITS: '8',
|
|
37
|
+
ENABLE_QUERY_CACHE: 'true',
|
|
38
|
+
QUERY_CACHE_TTL: '300'
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Memory system configuration defaults
|
|
43
|
+
*/
|
|
44
|
+
const MEMORY_DEFAULTS = {
|
|
45
|
+
// Feature flags
|
|
46
|
+
MEMORY_ENABLED: 'true',
|
|
47
|
+
MEMORY_AUTO_CAPTURE: 'true',
|
|
48
|
+
MEMORY_AUTO_RECALL: 'true',
|
|
49
|
+
|
|
50
|
+
// Recall settings
|
|
51
|
+
MEMORY_MAX_CONTEXT: '5',
|
|
52
|
+
MEMORY_RELEVANCE_THRESHOLD: '0.7',
|
|
53
|
+
MEMORY_IMPORTANCE_BOOST: '1.5',
|
|
54
|
+
MEMORY_RECENCY_WEIGHT: '0.3',
|
|
55
|
+
|
|
56
|
+
// Capture settings
|
|
57
|
+
MEMORY_MIN_IMPORTANCE: '0.3',
|
|
58
|
+
MEMORY_DEDUP_THRESHOLD: '0.9',
|
|
59
|
+
MEMORY_CAPTURE_TOOL_RESULTS: 'true',
|
|
60
|
+
MEMORY_CAPTURE_FILE_OPS: 'true',
|
|
61
|
+
|
|
62
|
+
// Retention settings
|
|
63
|
+
MEMORY_RETENTION_ENABLED: 'true',
|
|
64
|
+
MEMORY_RETENTION_DAYS: '90',
|
|
65
|
+
MEMORY_MAX_PER_SESSION: '100',
|
|
66
|
+
MEMORY_MIN_IMPORTANCE_TO_KEEP: '0.5',
|
|
67
|
+
|
|
68
|
+
// Privacy settings
|
|
69
|
+
MEMORY_REDACT_PII: 'false',
|
|
70
|
+
MEMORY_ENCRYPTION_ENABLED: 'false',
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Load configuration with validation
|
|
76
|
+
*/
|
|
77
|
+
function loadConfig() {
|
|
78
|
+
const config = {};
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
for (const [key, defaultValue] of Object.entries(DEFAULTS)) {
|
|
82
|
+
config[key] = process.env[key] || defaultValue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Resolve relative paths to absolute
|
|
86
|
+
if (config.LANCEDB_URI.startsWith('./') || config.LANCEDB_URI.startsWith('../')) {
|
|
87
|
+
config.LANCEDB_URI = path.resolve(process.cwd(), config.LANCEDB_URI);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return config;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Load memory-specific configuration
|
|
95
|
+
* @returns {Object} Memory configuration object
|
|
96
|
+
*/
|
|
97
|
+
function loadMemoryConfig() {
|
|
98
|
+
return {
|
|
99
|
+
enabled: process.env.MEMORY_ENABLED !== 'false',
|
|
100
|
+
autoCapture: process.env.MEMORY_AUTO_CAPTURE !== 'false',
|
|
101
|
+
autoRecall: process.env.MEMORY_AUTO_RECALL !== 'false',
|
|
102
|
+
maxContext: parseInt(process.env.MEMORY_MAX_CONTEXT || '5'),
|
|
103
|
+
relevanceThreshold: parseFloat(process.env.MEMORY_RELEVANCE_THRESHOLD || '0.7'),
|
|
104
|
+
importanceBoost: parseFloat(process.env.MEMORY_IMPORTANCE_BOOST || '1.5'),
|
|
105
|
+
recencyWeight: parseFloat(process.env.MEMORY_RECENCY_WEIGHT || '0.3'),
|
|
106
|
+
minImportance: parseFloat(process.env.MEMORY_MIN_IMPORTANCE || '0.3'),
|
|
107
|
+
dedupThreshold: parseFloat(process.env.MEMORY_DEDUP_THRESHOLD || '0.9'),
|
|
108
|
+
captureToolResults: process.env.MEMORY_CAPTURE_TOOL_RESULTS !== 'false',
|
|
109
|
+
captureFileOps: process.env.MEMORY_CAPTURE_FILE_OPS !== 'false',
|
|
110
|
+
retention: {
|
|
111
|
+
enabled: process.env.MEMORY_RETENTION_ENABLED !== 'false',
|
|
112
|
+
days: parseInt(process.env.MEMORY_RETENTION_DAYS || '90'),
|
|
113
|
+
maxPerSession: parseInt(process.env.MEMORY_MAX_PER_SESSION || '100'),
|
|
114
|
+
minImportanceToKeep: parseFloat(process.env.MEMORY_MIN_IMPORTANCE_TO_KEEP || '0.5'),
|
|
115
|
+
},
|
|
116
|
+
privacy: {
|
|
117
|
+
redactPii: process.env.MEMORY_REDACT_PII === 'true',
|
|
118
|
+
encryptionEnabled: process.env.MEMORY_ENCRYPTION_ENABLED === 'true',
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Validate configuration
|
|
125
|
+
*/
|
|
126
|
+
function validateConfig(config) {
|
|
127
|
+
const errors = [];
|
|
128
|
+
|
|
129
|
+
// Validate embedding model type
|
|
130
|
+
const validModelTypes = ['local', 'openai', 'cohere', 'voyage'];
|
|
131
|
+
if (!validModelTypes.includes(config.EMBEDDING_MODEL_TYPE)) {
|
|
132
|
+
errors.push(`Invalid EMBEDDING_MODEL_TYPE: ${config.EMBEDDING_MODEL_TYPE}`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Validate numeric values
|
|
136
|
+
const dimension = parseInt(config.EMBEDDING_DIMENSION);
|
|
137
|
+
if (isNaN(dimension) || dimension <= 0) {
|
|
138
|
+
errors.push(`Invalid EMBEDDING_DIMENSION: ${config.EMBEDDING_DIMENSION}`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const topK = parseInt(config.DEFAULT_TOP_K);
|
|
142
|
+
if (isNaN(topK) || topK <= 0) {
|
|
143
|
+
errors.push(`Invalid DEFAULT_TOP_K: ${config.DEFAULT_TOP_K}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Validate boolean strings
|
|
147
|
+
const boolFields = ['EMBEDDING_NORMALIZE', 'ENABLE_HYBRID_SEARCH', 'ENABLE_QUERY_CACHE'];
|
|
148
|
+
for (const field of boolFields) {
|
|
149
|
+
const value = config[field].toLowerCase();
|
|
150
|
+
if (value !== 'true' && value !== 'false') {
|
|
151
|
+
errors.push(`Invalid ${field}: must be 'true' or 'false'`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Validate similarity threshold (0-1 range)
|
|
156
|
+
const threshold = parseFloat(config.DEFAULT_SIMILARITY_THRESHOLD);
|
|
157
|
+
if (isNaN(threshold) || threshold < 0 || threshold > 1) {
|
|
158
|
+
errors.push(`Invalid DEFAULT_SIMILARITY_THRESHOLD: must be between 0 and 1`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Validate hybrid search alpha (0-1 range)
|
|
162
|
+
const alpha = parseFloat(config.HYBRID_SEARCH_ALPHA);
|
|
163
|
+
if (isNaN(alpha) || alpha < 0 || alpha > 1) {
|
|
164
|
+
errors.push(`Invalid HYBRID_SEARCH_ALPHA: must be between 0 and 1`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Validate positive integers
|
|
168
|
+
const positiveIntFields = ['EMBEDDING_BATCH_SIZE', 'IVF_PARTITIONS', 'PQ_BITS', 'QUERY_CACHE_TTL'];
|
|
169
|
+
for (const field of positiveIntFields) {
|
|
170
|
+
const value = parseInt(config[field]);
|
|
171
|
+
if (isNaN(value) || value <= 0) {
|
|
172
|
+
errors.push(`Invalid ${field}: must be a positive integer`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Validate cache size format (e.g., "2GB", "500MB")
|
|
177
|
+
const cacheSizePattern = /^\d+(\.\d+)?(KB|MB|GB|TB)$/;
|
|
178
|
+
if (!cacheSizePattern.test(config.LANCEDB_MAX_CACHE_SIZE)) {
|
|
179
|
+
errors.push(`Invalid LANCEDB_MAX_CACHE_SIZE: must match pattern like "2GB", "500MB"`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return errors;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Get validated configuration
|
|
187
|
+
*/
|
|
188
|
+
function getConfig() {
|
|
189
|
+
const config = loadConfig();
|
|
190
|
+
const errors = validateConfig(config);
|
|
191
|
+
|
|
192
|
+
if (errors.length > 0) {
|
|
193
|
+
throw new Error(`Configuration validation failed:\n${errors.join('\n')}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return config;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export {
|
|
200
|
+
loadConfig,
|
|
201
|
+
validateConfig,
|
|
202
|
+
getConfig,
|
|
203
|
+
loadMemoryConfig,
|
|
204
|
+
DEFAULTS,
|
|
205
|
+
MEMORY_DEFAULTS,
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
export default {
|
|
209
|
+
loadConfig,
|
|
210
|
+
validateConfig,
|
|
211
|
+
getConfig,
|
|
212
|
+
loadMemoryConfig,
|
|
213
|
+
DEFAULTS,
|
|
214
|
+
MEMORY_DEFAULTS,
|
|
215
|
+
};
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom error classes for LanceDB operations
|
|
3
|
+
*
|
|
4
|
+
* Base error class for all LanceDB-related errors. Captures proper stack traces
|
|
5
|
+
* to ensure debugging information points to where errors are thrown, not to the
|
|
6
|
+
* error constructor.
|
|
7
|
+
*/
|
|
8
|
+
class LanceDBError extends Error {
|
|
9
|
+
/**
|
|
10
|
+
* Create a new LanceDBError
|
|
11
|
+
* @param {string} message - Human-readable error message
|
|
12
|
+
* @param {string} code - Machine-readable error code (e.g., 'EMBEDDING_ERROR')
|
|
13
|
+
* @param {Object} details - Additional error context and metadata
|
|
14
|
+
*/
|
|
15
|
+
constructor(message, code, details = {}) {
|
|
16
|
+
super(message);
|
|
17
|
+
this.name = 'LanceDBError';
|
|
18
|
+
this.code = code;
|
|
19
|
+
this.details = details;
|
|
20
|
+
this.timestamp = new Date().toISOString();
|
|
21
|
+
|
|
22
|
+
// Capture stack trace for proper debugging (Node.js best practice)
|
|
23
|
+
// This ensures stack traces point to where the error was thrown,
|
|
24
|
+
// not to the error constructor itself
|
|
25
|
+
Error.captureStackTrace(this, this.constructor);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Error raised when embedding generation or comparison fails
|
|
31
|
+
*/
|
|
32
|
+
class EmbeddingError extends LanceDBError {
|
|
33
|
+
constructor(message, details) {
|
|
34
|
+
super(message, 'EMBEDDING_ERROR', details);
|
|
35
|
+
this.name = 'EmbeddingError';
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Error raised when storage operations (read/write/delete) fail
|
|
41
|
+
*/
|
|
42
|
+
class StorageError extends LanceDBError {
|
|
43
|
+
constructor(message, details) {
|
|
44
|
+
super(message, 'STORAGE_ERROR', details);
|
|
45
|
+
this.name = 'StorageError';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Error raised when database queries fail or return invalid results
|
|
51
|
+
*/
|
|
52
|
+
class QueryError extends LanceDBError {
|
|
53
|
+
constructor(message, details) {
|
|
54
|
+
super(message, 'QUERY_ERROR', details);
|
|
55
|
+
this.name = 'QueryError';
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Error raised when configuration is missing or invalid
|
|
61
|
+
*/
|
|
62
|
+
class ConfigurationError extends LanceDBError {
|
|
63
|
+
constructor(message, details) {
|
|
64
|
+
super(message, 'CONFIGURATION_ERROR', details);
|
|
65
|
+
this.name = 'ConfigurationError';
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Sanitize error messages by redacting sensitive information
|
|
71
|
+
* @param {string} message - Error message to sanitize
|
|
72
|
+
* @returns {string} Sanitized error message
|
|
73
|
+
*/
|
|
74
|
+
function sanitizeErrorMessage(message) {
|
|
75
|
+
if (typeof message !== 'string') {
|
|
76
|
+
return '[Non-string error message]';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Redact common sensitive patterns
|
|
80
|
+
return message
|
|
81
|
+
// Redact Bearer tokens
|
|
82
|
+
.replace(/Bearer\s+[A-Za-z0-9\-._~+/]+=*/gi, 'Bearer [REDACTED]')
|
|
83
|
+
// Redact OpenAI API keys (sk- followed by 32+ chars)
|
|
84
|
+
.replace(/sk-[A-Za-z0-9]{32,}/g, 'sk-[REDACTED]')
|
|
85
|
+
// Redact generic API keys (20+ alphanumeric chars after api_key)
|
|
86
|
+
.replace(/api_key["\s:]+[A-Za-z0-9]{20,}/gi, 'api_key: [REDACTED]')
|
|
87
|
+
// Redact environment variable patterns that might contain secrets
|
|
88
|
+
.replace(/(OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY)[="'\s]+[A-Za-z0-9\-_]+/gi, '$1=[REDACTED]')
|
|
89
|
+
// Redact Authorization headers
|
|
90
|
+
.replace(/Authorization:\s*[^"\r\n]+/gi, 'Authorization: [REDACTED]')
|
|
91
|
+
// Redact potential JWT tokens
|
|
92
|
+
.replace(/eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]*/g, '[JWT_REDACTED]');
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Normalize errors into a consistent response format
|
|
97
|
+
* @param {Error} error - The error to handle
|
|
98
|
+
* @param {Object} context - Additional context about where/when the error occurred
|
|
99
|
+
* @returns {Object} Formatted error response with success: false
|
|
100
|
+
*/
|
|
101
|
+
function handleError(error, context = {}) {
|
|
102
|
+
if (error instanceof LanceDBError) {
|
|
103
|
+
return {
|
|
104
|
+
success: false,
|
|
105
|
+
error: {
|
|
106
|
+
code: error.code,
|
|
107
|
+
message: sanitizeErrorMessage(error.message),
|
|
108
|
+
details: error.details,
|
|
109
|
+
context
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Wrap unknown errors
|
|
115
|
+
return {
|
|
116
|
+
success: false,
|
|
117
|
+
error: {
|
|
118
|
+
code: 'UNKNOWN_ERROR',
|
|
119
|
+
message: sanitizeErrorMessage(error.message),
|
|
120
|
+
stack: process.env.NODE_ENV === 'development' ? error.stack : undefined,
|
|
121
|
+
context
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export {
|
|
127
|
+
LanceDBError,
|
|
128
|
+
EmbeddingError,
|
|
129
|
+
StorageError,
|
|
130
|
+
QueryError,
|
|
131
|
+
ConfigurationError,
|
|
132
|
+
handleError,
|
|
133
|
+
sanitizeErrorMessage
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
export default {
|
|
137
|
+
LanceDBError,
|
|
138
|
+
EmbeddingError,
|
|
139
|
+
StorageError,
|
|
140
|
+
QueryError,
|
|
141
|
+
ConfigurationError,
|
|
142
|
+
handleError,
|
|
143
|
+
sanitizeErrorMessage
|
|
144
|
+
};
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { LanceDBClient } from './client.js';
|
|
2
|
+
export { loadConfig, validateConfig, getConfig, DEFAULTS } from './config.js';
|
|
3
|
+
export { MEMORY_SCHEMA, INDEX_CONFIG, createMemoryTable, createMemoryTableWithDimension, createMemorySchema, getEmbeddingDimension, DEFAULT_VECTOR_DIMENSION, EMBEDDING_DIMENSIONS } from './schema.js';
|
|
4
|
+
export { LanceDBError, EmbeddingError, StorageError, QueryError, ConfigurationError, handleError, sanitizeErrorMessage } from './errors.js';
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB Schema Definitions for MemoryManager
|
|
3
|
+
* Uses Apache Arrow Schema format for LanceDB JavaScript SDK
|
|
4
|
+
*
|
|
5
|
+
* Supports dynamic vector dimensions for different embedding models:
|
|
6
|
+
* - all-MiniLM-L6-v2: 384 dimensions
|
|
7
|
+
* - all-mpnet-base-v2: 768 dimensions
|
|
8
|
+
* - text-embedding-3-small: 1536 dimensions
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as arrow from "apache-arrow";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Default vector dimension (all-MiniLM-L6-v2)
|
|
15
|
+
*/
|
|
16
|
+
export const DEFAULT_VECTOR_DIMENSION = 384;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Common embedding model dimensions
|
|
20
|
+
*/
|
|
21
|
+
export const EMBEDDING_DIMENSIONS = {
|
|
22
|
+
'Xenova/all-MiniLM-L6-v2': 384,
|
|
23
|
+
'Xenova/all-mpnet-base-v2': 768,
|
|
24
|
+
'Xenova/distiluse-base-multilingual-cased-v1': 512,
|
|
25
|
+
'sentence-transformers/all-MiniLM-L6-v2': 384,
|
|
26
|
+
'sentence-transformers/all-mpnet-base-v2': 768,
|
|
27
|
+
'openai/text-embedding-3-small': 1536,
|
|
28
|
+
'openai/text-embedding-3-large': 3072,
|
|
29
|
+
'cohere/embed-english-light-v3.0': 1024,
|
|
30
|
+
'cohere/embed-english-v3.0': 1024,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Get dimension for a given embedding model
|
|
35
|
+
* @param {string} modelName - Embedding model name or path
|
|
36
|
+
* @returns {number} Vector dimension
|
|
37
|
+
*/
|
|
38
|
+
export function getEmbeddingDimension(modelName) {
|
|
39
|
+
if (!modelName) return DEFAULT_VECTOR_DIMENSION;
|
|
40
|
+
|
|
41
|
+
// Check exact match
|
|
42
|
+
if (EMBEDDING_DIMENSIONS[modelName]) {
|
|
43
|
+
return EMBEDDING_DIMENSIONS[modelName];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Check for partial matches
|
|
47
|
+
for (const [key, dimension] of Object.entries(EMBEDDING_DIMENSIONS)) {
|
|
48
|
+
if (modelName.toLowerCase().includes(key.toLowerCase())) {
|
|
49
|
+
return dimension;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Fallback to default
|
|
54
|
+
return DEFAULT_VECTOR_DIMENSION;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Create a memory schema with a specific vector dimension
|
|
59
|
+
* @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
|
|
60
|
+
* @returns {import('apache-arrow').Schema} Arrow schema with specified dimension
|
|
61
|
+
*/
|
|
62
|
+
export function createMemorySchema(vectorDim = DEFAULT_VECTOR_DIMENSION) {
|
|
63
|
+
return new arrow.Schema([
|
|
64
|
+
new arrow.Field('id', new arrow.Utf8(), false),
|
|
65
|
+
new arrow.Field('vector',
|
|
66
|
+
new arrow.FixedSizeList(vectorDim, new arrow.Field('item', new arrow.Float32(), true)),
|
|
67
|
+
false
|
|
68
|
+
),
|
|
69
|
+
new arrow.Field('content', new arrow.Utf8(), false),
|
|
70
|
+
new arrow.Field('metadata', new arrow.Utf8(), true), // Stored as JSON string
|
|
71
|
+
new arrow.Field('created_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
|
|
72
|
+
new arrow.Field('updated_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true)
|
|
73
|
+
]);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Create V2 memory schema with automatic recall fields
|
|
78
|
+
* All new fields are nullable for backward compatibility
|
|
79
|
+
* @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
|
|
80
|
+
* @returns {import('apache-arrow').Schema} Arrow schema with V2 fields
|
|
81
|
+
*/
|
|
82
|
+
function createMemorySchemaV2(vectorDim = DEFAULT_VECTOR_DIMENSION) {
|
|
83
|
+
return new arrow.Schema([
|
|
84
|
+
// ========== V1 Fields (Backward Compatible) ==========
|
|
85
|
+
new arrow.Field('id', new arrow.Utf8(), false),
|
|
86
|
+
new arrow.Field('vector',
|
|
87
|
+
new arrow.FixedSizeList(vectorDim, new arrow.Field('item', new arrow.Float32(), true)),
|
|
88
|
+
false
|
|
89
|
+
),
|
|
90
|
+
new arrow.Field('content', new arrow.Utf8(), false),
|
|
91
|
+
new arrow.Field('metadata', new arrow.Utf8(), true),
|
|
92
|
+
new arrow.Field('created_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
|
|
93
|
+
new arrow.Field('updated_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
|
|
94
|
+
|
|
95
|
+
// ========== V2 Fields (All Nullable) ==========
|
|
96
|
+
new arrow.Field('session_id', new arrow.Utf8(), true), // Session association
|
|
97
|
+
new arrow.Field('agent_id', new arrow.Utf8(), true), // Agent/skill that created memory
|
|
98
|
+
new arrow.Field('memory_type', new arrow.Utf8(), true), // 'global', 'session', 'agent'
|
|
99
|
+
new arrow.Field('importance_score', new arrow.Float32(), true), // 0.0-1.0 importance
|
|
100
|
+
new arrow.Field('access_count', new arrow.Int32(), true), // Popularity tracking
|
|
101
|
+
new arrow.Field('last_accessed', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
|
|
102
|
+
]);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Check if a table is using V2 schema
|
|
107
|
+
* @param {import('apache-arrow').Schema} schema - Table schema to check
|
|
108
|
+
* @returns {boolean} True if V2 schema detected
|
|
109
|
+
*/
|
|
110
|
+
function isSchemaV2(schema) {
|
|
111
|
+
return schema.fields.some(f => f.name === 'session_id');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Memory table schema using Apache Arrow format (default 384 dimensions)
|
|
116
|
+
* @deprecated Use createMemorySchema(vectorDim) for dynamic dimensions
|
|
117
|
+
*/
|
|
118
|
+
const MEMORY_SCHEMA = createMemorySchema(DEFAULT_VECTOR_DIMENSION);
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Index configuration for memory table
|
|
122
|
+
* Indices should be created after data is inserted
|
|
123
|
+
*/
|
|
124
|
+
export const INDEX_CONFIG = {
|
|
125
|
+
vector: {
|
|
126
|
+
index_type: 'ivf_pq',
|
|
127
|
+
metric: 'cosine',
|
|
128
|
+
num_partitions: 256,
|
|
129
|
+
num_sub_vectors: 8
|
|
130
|
+
},
|
|
131
|
+
full_text: {
|
|
132
|
+
fields: ['content']
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Creates a memory table in LanceDB with the predefined schema (384 dimensions)
|
|
138
|
+
* @param {import('@lancedb/lancedb').Connection} db - LanceDB connection
|
|
139
|
+
* @param {string} tableName - Name of the table to create (default: 'memory_entries')
|
|
140
|
+
* @returns {Promise<import('@lancedb/lancedb').Table>} The created or opened table
|
|
141
|
+
* @throws {Error} If table creation fails
|
|
142
|
+
* @deprecated Use createMemoryTableWithDimension() for dynamic dimensions
|
|
143
|
+
*/
|
|
144
|
+
async function createMemoryTable(db, tableName = 'memory_entries') {
|
|
145
|
+
return createMemoryTableWithDimension(db, tableName, DEFAULT_VECTOR_DIMENSION);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Creates a memory table in LanceDB with a specific vector dimension
|
|
150
|
+
* @param {import('@lancedb/lancedb').Connection} db - LanceDB connection
|
|
151
|
+
* @param {string} tableName - Name of the table to create
|
|
152
|
+
* @param {number} vectorDim - Vector dimension (384, 768, 1536, etc.)
|
|
153
|
+
* @returns {Promise<import('@lancedb/lancedb').Table>} The created or opened table
|
|
154
|
+
* @throws {Error} If table creation fails
|
|
155
|
+
*/
|
|
156
|
+
async function createMemoryTableWithDimension(db, tableName, vectorDim) {
|
|
157
|
+
try {
|
|
158
|
+
// Check if table already exists
|
|
159
|
+
const existingTables = await db.tableNames();
|
|
160
|
+
|
|
161
|
+
if (existingTables.includes(tableName)) {
|
|
162
|
+
return await db.openTable(tableName);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Create schema with specified dimension
|
|
166
|
+
const schema = createMemorySchema(vectorDim);
|
|
167
|
+
|
|
168
|
+
// Create table with schema
|
|
169
|
+
// LanceDB v0.23.0+ accepts empty array as initial data with schema option
|
|
170
|
+
const table = await db.createTable(tableName, [], { schema });
|
|
171
|
+
return table;
|
|
172
|
+
} catch (error) {
|
|
173
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
174
|
+
throw new Error(`Failed to create memory table with dimension ${vectorDim}: ${message}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export {
|
|
179
|
+
MEMORY_SCHEMA,
|
|
180
|
+
createMemoryTable,
|
|
181
|
+
createMemoryTableWithDimension,
|
|
182
|
+
createMemorySchemaV2,
|
|
183
|
+
isSchemaV2
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
export default {
|
|
187
|
+
MEMORY_SCHEMA,
|
|
188
|
+
INDEX_CONFIG,
|
|
189
|
+
createMemoryTable,
|
|
190
|
+
createMemoryTableWithDimension,
|
|
191
|
+
createMemorySchema,
|
|
192
|
+
createMemorySchemaV2,
|
|
193
|
+
isSchemaV2,
|
|
194
|
+
getEmbeddingDimension,
|
|
195
|
+
DEFAULT_VECTOR_DIMENSION,
|
|
196
|
+
EMBEDDING_DIMENSIONS
|
|
197
|
+
};
|