@yamo/memory-mesh 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/bin/memory_mesh.js +1 -1
  2. package/lib/llm/client.d.ts +111 -0
  3. package/lib/llm/client.js +299 -357
  4. package/lib/llm/client.ts +413 -0
  5. package/lib/llm/index.d.ts +17 -0
  6. package/lib/llm/index.js +15 -8
  7. package/lib/llm/index.ts +19 -0
  8. package/lib/memory/adapters/client.d.ts +183 -0
  9. package/lib/memory/adapters/client.js +518 -0
  10. package/lib/memory/adapters/client.ts +678 -0
  11. package/lib/memory/adapters/config.d.ts +137 -0
  12. package/lib/memory/adapters/config.js +189 -0
  13. package/lib/memory/adapters/config.ts +259 -0
  14. package/lib/memory/adapters/errors.d.ts +76 -0
  15. package/lib/memory/adapters/errors.js +128 -0
  16. package/lib/memory/adapters/errors.ts +166 -0
  17. package/lib/memory/context-manager.d.ts +44 -0
  18. package/lib/memory/context-manager.js +344 -0
  19. package/lib/memory/context-manager.ts +432 -0
  20. package/lib/memory/embeddings/factory.d.ts +59 -0
  21. package/lib/memory/embeddings/factory.js +148 -0
  22. package/lib/{embeddings/factory.js → memory/embeddings/factory.ts} +69 -28
  23. package/lib/memory/embeddings/index.d.ts +2 -0
  24. package/lib/memory/embeddings/index.js +2 -0
  25. package/lib/memory/embeddings/index.ts +2 -0
  26. package/lib/memory/embeddings/service.d.ts +164 -0
  27. package/lib/memory/embeddings/service.js +515 -0
  28. package/lib/{embeddings/service.js → memory/embeddings/service.ts} +223 -156
  29. package/lib/memory/index.d.ts +9 -0
  30. package/lib/memory/index.js +9 -1
  31. package/lib/memory/index.ts +20 -0
  32. package/lib/memory/memory-mesh.d.ts +274 -0
  33. package/lib/memory/memory-mesh.js +1469 -678
  34. package/lib/memory/memory-mesh.ts +1803 -0
  35. package/lib/memory/memory-translator.d.ts +19 -0
  36. package/lib/memory/memory-translator.js +125 -0
  37. package/lib/memory/memory-translator.ts +158 -0
  38. package/lib/memory/schema.d.ts +111 -0
  39. package/lib/memory/schema.js +183 -0
  40. package/lib/memory/schema.ts +267 -0
  41. package/lib/memory/scorer.d.ts +26 -0
  42. package/lib/memory/scorer.js +77 -0
  43. package/lib/memory/scorer.ts +95 -0
  44. package/lib/memory/search/index.d.ts +1 -0
  45. package/lib/memory/search/index.js +1 -0
  46. package/lib/memory/search/index.ts +1 -0
  47. package/lib/memory/search/keyword-search.d.ts +62 -0
  48. package/lib/memory/search/keyword-search.js +135 -0
  49. package/lib/{search/keyword-search.js → memory/search/keyword-search.ts} +66 -36
  50. package/lib/scrubber/config/defaults.d.ts +53 -0
  51. package/lib/scrubber/config/defaults.js +49 -57
  52. package/lib/scrubber/config/defaults.ts +117 -0
  53. package/lib/scrubber/index.d.ts +6 -0
  54. package/lib/scrubber/index.js +3 -23
  55. package/lib/scrubber/index.ts +7 -0
  56. package/lib/scrubber/scrubber.d.ts +61 -0
  57. package/lib/scrubber/scrubber.js +99 -121
  58. package/lib/scrubber/scrubber.ts +168 -0
  59. package/lib/scrubber/stages/chunker.d.ts +13 -0
  60. package/lib/scrubber/stages/metadata-annotator.d.ts +18 -0
  61. package/lib/scrubber/stages/normalizer.d.ts +13 -0
  62. package/lib/scrubber/stages/semantic-filter.d.ts +13 -0
  63. package/lib/scrubber/stages/structural-cleaner.d.ts +13 -0
  64. package/lib/scrubber/stages/validator.d.ts +18 -0
  65. package/lib/scrubber/telemetry.d.ts +36 -0
  66. package/lib/scrubber/telemetry.js +53 -58
  67. package/lib/scrubber/telemetry.ts +99 -0
  68. package/lib/utils/logger.d.ts +29 -0
  69. package/lib/utils/logger.js +64 -0
  70. package/lib/utils/logger.ts +85 -0
  71. package/lib/utils/skill-metadata.d.ts +32 -0
  72. package/lib/utils/skill-metadata.js +132 -0
  73. package/lib/utils/skill-metadata.ts +147 -0
  74. package/lib/yamo/emitter.d.ts +73 -0
  75. package/lib/yamo/emitter.js +78 -143
  76. package/lib/yamo/emitter.ts +249 -0
  77. package/lib/yamo/schema.d.ts +58 -0
  78. package/lib/yamo/schema.js +81 -108
  79. package/lib/yamo/schema.ts +165 -0
  80. package/package.json +11 -8
  81. package/index.d.ts +0 -111
  82. package/lib/embeddings/index.js +0 -2
  83. package/lib/index.js +0 -6
  84. package/lib/lancedb/client.js +0 -633
  85. package/lib/lancedb/config.js +0 -215
  86. package/lib/lancedb/errors.js +0 -144
  87. package/lib/lancedb/index.js +0 -4
  88. package/lib/lancedb/schema.js +0 -217
  89. package/lib/scrubber/errors/scrubber-error.js +0 -43
  90. package/lib/scrubber/stages/chunker.js +0 -103
  91. package/lib/scrubber/stages/metadata-annotator.js +0 -74
  92. package/lib/scrubber/stages/normalizer.js +0 -59
  93. package/lib/scrubber/stages/semantic-filter.js +0 -61
  94. package/lib/scrubber/stages/structural-cleaner.js +0 -82
  95. package/lib/scrubber/stages/validator.js +0 -66
  96. package/lib/scrubber/utils/hash.js +0 -39
  97. package/lib/scrubber/utils/html-parser.js +0 -45
  98. package/lib/scrubber/utils/pattern-matcher.js +0 -63
  99. package/lib/scrubber/utils/token-counter.js +0 -31
  100. package/lib/search/index.js +0 -1
  101. package/lib/utils/index.js +0 -1
  102. package/lib/yamo/index.js +0 -15
@@ -1,215 +0,0 @@
1
- /**
2
- * LanceDB Configuration Loader
3
- * Loads and validates configuration from environment variables
4
- */
5
-
6
- import path from "path";
7
-
8
- /**
9
- * Default configuration values
10
- */
11
- const DEFAULTS = {
12
- // LanceDB Configuration
13
- LANCEDB_URI: './runtime/data/lancedb',
14
- LANCEDB_MEMORY_TABLE: 'memory_entries',
15
- LANCEDB_MAX_CACHE_SIZE: '2GB',
16
-
17
- // Embedding Model Configuration
18
- EMBEDDING_MODEL_TYPE: 'local',
19
- EMBEDDING_MODEL_NAME: 'Xenova/all-MiniLM-L6-v2',
20
- EMBEDDING_DIMENSION: '384',
21
- EMBEDDING_BATCH_SIZE: '32',
22
- EMBEDDING_NORMALIZE: 'true',
23
-
24
- // API-based Embeddings
25
- OPENAI_EMBEDDING_MODEL: 'text-embedding-3-small',
26
-
27
- // Search Configuration
28
- DEFAULT_TOP_K: '10',
29
- DEFAULT_SIMILARITY_THRESHOLD: '0.7',
30
- ENABLE_HYBRID_SEARCH: 'true',
31
- HYBRID_SEARCH_ALPHA: '0.5',
32
-
33
- // Performance Tuning
34
- VECTOR_INDEX_TYPE: 'ivf_pq',
35
- IVF_PARTITIONS: '256',
36
- PQ_BITS: '8',
37
- ENABLE_QUERY_CACHE: 'true',
38
- QUERY_CACHE_TTL: '300'
39
- };
40
-
41
- /**
42
- * Memory system configuration defaults
43
- */
44
- const MEMORY_DEFAULTS = {
45
- // Feature flags
46
- MEMORY_ENABLED: 'true',
47
- MEMORY_AUTO_CAPTURE: 'true',
48
- MEMORY_AUTO_RECALL: 'true',
49
-
50
- // Recall settings
51
- MEMORY_MAX_CONTEXT: '5',
52
- MEMORY_RELEVANCE_THRESHOLD: '0.7',
53
- MEMORY_IMPORTANCE_BOOST: '1.5',
54
- MEMORY_RECENCY_WEIGHT: '0.3',
55
-
56
- // Capture settings
57
- MEMORY_MIN_IMPORTANCE: '0.3',
58
- MEMORY_DEDUP_THRESHOLD: '0.9',
59
- MEMORY_CAPTURE_TOOL_RESULTS: 'true',
60
- MEMORY_CAPTURE_FILE_OPS: 'true',
61
-
62
- // Retention settings
63
- MEMORY_RETENTION_ENABLED: 'true',
64
- MEMORY_RETENTION_DAYS: '90',
65
- MEMORY_MAX_PER_SESSION: '100',
66
- MEMORY_MIN_IMPORTANCE_TO_KEEP: '0.5',
67
-
68
- // Privacy settings
69
- MEMORY_REDACT_PII: 'false',
70
- MEMORY_ENCRYPTION_ENABLED: 'false',
71
- };
72
-
73
-
74
- /**
75
- * Load configuration with validation
76
- */
77
- function loadConfig() {
78
- const config = {};
79
-
80
-
81
- for (const [key, defaultValue] of Object.entries(DEFAULTS)) {
82
- config[key] = process.env[key] || defaultValue;
83
- }
84
-
85
- // Resolve relative paths to absolute
86
- if (config.LANCEDB_URI.startsWith('./') || config.LANCEDB_URI.startsWith('../')) {
87
- config.LANCEDB_URI = path.resolve(process.cwd(), config.LANCEDB_URI);
88
- }
89
-
90
- return config;
91
- }
92
-
93
- /**
94
- * Load memory-specific configuration
95
- * @returns {Object} Memory configuration object
96
- */
97
- function loadMemoryConfig() {
98
- return {
99
- enabled: process.env.MEMORY_ENABLED !== 'false',
100
- autoCapture: process.env.MEMORY_AUTO_CAPTURE !== 'false',
101
- autoRecall: process.env.MEMORY_AUTO_RECALL !== 'false',
102
- maxContext: parseInt(process.env.MEMORY_MAX_CONTEXT || '5'),
103
- relevanceThreshold: parseFloat(process.env.MEMORY_RELEVANCE_THRESHOLD || '0.7'),
104
- importanceBoost: parseFloat(process.env.MEMORY_IMPORTANCE_BOOST || '1.5'),
105
- recencyWeight: parseFloat(process.env.MEMORY_RECENCY_WEIGHT || '0.3'),
106
- minImportance: parseFloat(process.env.MEMORY_MIN_IMPORTANCE || '0.3'),
107
- dedupThreshold: parseFloat(process.env.MEMORY_DEDUP_THRESHOLD || '0.9'),
108
- captureToolResults: process.env.MEMORY_CAPTURE_TOOL_RESULTS !== 'false',
109
- captureFileOps: process.env.MEMORY_CAPTURE_FILE_OPS !== 'false',
110
- retention: {
111
- enabled: process.env.MEMORY_RETENTION_ENABLED !== 'false',
112
- days: parseInt(process.env.MEMORY_RETENTION_DAYS || '90'),
113
- maxPerSession: parseInt(process.env.MEMORY_MAX_PER_SESSION || '100'),
114
- minImportanceToKeep: parseFloat(process.env.MEMORY_MIN_IMPORTANCE_TO_KEEP || '0.5'),
115
- },
116
- privacy: {
117
- redactPii: process.env.MEMORY_REDACT_PII === 'true',
118
- encryptionEnabled: process.env.MEMORY_ENCRYPTION_ENABLED === 'true',
119
- },
120
- };
121
- }
122
-
123
- /**
124
- * Validate configuration
125
- */
126
- function validateConfig(config) {
127
- const errors = [];
128
-
129
- // Validate embedding model type
130
- const validModelTypes = ['local', 'openai', 'cohere', 'voyage'];
131
- if (!validModelTypes.includes(config.EMBEDDING_MODEL_TYPE)) {
132
- errors.push(`Invalid EMBEDDING_MODEL_TYPE: ${config.EMBEDDING_MODEL_TYPE}`);
133
- }
134
-
135
- // Validate numeric values
136
- const dimension = parseInt(config.EMBEDDING_DIMENSION);
137
- if (isNaN(dimension) || dimension <= 0) {
138
- errors.push(`Invalid EMBEDDING_DIMENSION: ${config.EMBEDDING_DIMENSION}`);
139
- }
140
-
141
- const topK = parseInt(config.DEFAULT_TOP_K);
142
- if (isNaN(topK) || topK <= 0) {
143
- errors.push(`Invalid DEFAULT_TOP_K: ${config.DEFAULT_TOP_K}`);
144
- }
145
-
146
- // Validate boolean strings
147
- const boolFields = ['EMBEDDING_NORMALIZE', 'ENABLE_HYBRID_SEARCH', 'ENABLE_QUERY_CACHE'];
148
- for (const field of boolFields) {
149
- const value = config[field].toLowerCase();
150
- if (value !== 'true' && value !== 'false') {
151
- errors.push(`Invalid ${field}: must be 'true' or 'false'`);
152
- }
153
- }
154
-
155
- // Validate similarity threshold (0-1 range)
156
- const threshold = parseFloat(config.DEFAULT_SIMILARITY_THRESHOLD);
157
- if (isNaN(threshold) || threshold < 0 || threshold > 1) {
158
- errors.push(`Invalid DEFAULT_SIMILARITY_THRESHOLD: must be between 0 and 1`);
159
- }
160
-
161
- // Validate hybrid search alpha (0-1 range)
162
- const alpha = parseFloat(config.HYBRID_SEARCH_ALPHA);
163
- if (isNaN(alpha) || alpha < 0 || alpha > 1) {
164
- errors.push(`Invalid HYBRID_SEARCH_ALPHA: must be between 0 and 1`);
165
- }
166
-
167
- // Validate positive integers
168
- const positiveIntFields = ['EMBEDDING_BATCH_SIZE', 'IVF_PARTITIONS', 'PQ_BITS', 'QUERY_CACHE_TTL'];
169
- for (const field of positiveIntFields) {
170
- const value = parseInt(config[field]);
171
- if (isNaN(value) || value <= 0) {
172
- errors.push(`Invalid ${field}: must be a positive integer`);
173
- }
174
- }
175
-
176
- // Validate cache size format (e.g., "2GB", "500MB")
177
- const cacheSizePattern = /^\d+(\.\d+)?(KB|MB|GB|TB)$/;
178
- if (!cacheSizePattern.test(config.LANCEDB_MAX_CACHE_SIZE)) {
179
- errors.push(`Invalid LANCEDB_MAX_CACHE_SIZE: must match pattern like "2GB", "500MB"`);
180
- }
181
-
182
- return errors;
183
- }
184
-
185
- /**
186
- * Get validated configuration
187
- */
188
- function getConfig() {
189
- const config = loadConfig();
190
- const errors = validateConfig(config);
191
-
192
- if (errors.length > 0) {
193
- throw new Error(`Configuration validation failed:\n${errors.join('\n')}`);
194
- }
195
-
196
- return config;
197
- }
198
-
199
- export {
200
- loadConfig,
201
- validateConfig,
202
- getConfig,
203
- loadMemoryConfig,
204
- DEFAULTS,
205
- MEMORY_DEFAULTS,
206
- };
207
-
208
- export default {
209
- loadConfig,
210
- validateConfig,
211
- getConfig,
212
- loadMemoryConfig,
213
- DEFAULTS,
214
- MEMORY_DEFAULTS,
215
- };
@@ -1,144 +0,0 @@
1
- /**
2
- * Custom error classes for LanceDB operations
3
- *
4
- * Base error class for all LanceDB-related errors. Captures proper stack traces
5
- * to ensure debugging information points to where errors are thrown, not to the
6
- * error constructor.
7
- */
8
- class LanceDBError extends Error {
9
- /**
10
- * Create a new LanceDBError
11
- * @param {string} message - Human-readable error message
12
- * @param {string} code - Machine-readable error code (e.g., 'EMBEDDING_ERROR')
13
- * @param {Object} details - Additional error context and metadata
14
- */
15
- constructor(message, code, details = {}) {
16
- super(message);
17
- this.name = 'LanceDBError';
18
- this.code = code;
19
- this.details = details;
20
- this.timestamp = new Date().toISOString();
21
-
22
- // Capture stack trace for proper debugging (Node.js best practice)
23
- // This ensures stack traces point to where the error was thrown,
24
- // not to the error constructor itself
25
- Error.captureStackTrace(this, this.constructor);
26
- }
27
- }
28
-
29
- /**
30
- * Error raised when embedding generation or comparison fails
31
- */
32
- class EmbeddingError extends LanceDBError {
33
- constructor(message, details) {
34
- super(message, 'EMBEDDING_ERROR', details);
35
- this.name = 'EmbeddingError';
36
- }
37
- }
38
-
39
- /**
40
- * Error raised when storage operations (read/write/delete) fail
41
- */
42
- class StorageError extends LanceDBError {
43
- constructor(message, details) {
44
- super(message, 'STORAGE_ERROR', details);
45
- this.name = 'StorageError';
46
- }
47
- }
48
-
49
- /**
50
- * Error raised when database queries fail or return invalid results
51
- */
52
- class QueryError extends LanceDBError {
53
- constructor(message, details) {
54
- super(message, 'QUERY_ERROR', details);
55
- this.name = 'QueryError';
56
- }
57
- }
58
-
59
- /**
60
- * Error raised when configuration is missing or invalid
61
- */
62
- class ConfigurationError extends LanceDBError {
63
- constructor(message, details) {
64
- super(message, 'CONFIGURATION_ERROR', details);
65
- this.name = 'ConfigurationError';
66
- }
67
- }
68
-
69
- /**
70
- * Sanitize error messages by redacting sensitive information
71
- * @param {string} message - Error message to sanitize
72
- * @returns {string} Sanitized error message
73
- */
74
- function sanitizeErrorMessage(message) {
75
- if (typeof message !== 'string') {
76
- return '[Non-string error message]';
77
- }
78
-
79
- // Redact common sensitive patterns
80
- return message
81
- // Redact Bearer tokens
82
- .replace(/Bearer\s+[A-Za-z0-9\-._~+/]+=*/gi, 'Bearer [REDACTED]')
83
- // Redact OpenAI API keys (sk- followed by 32+ chars)
84
- .replace(/sk-[A-Za-z0-9]{32,}/g, 'sk-[REDACTED]')
85
- // Redact generic API keys (20+ alphanumeric chars after api_key)
86
- .replace(/api_key["\s:]+[A-Za-z0-9]{20,}/gi, 'api_key: [REDACTED]')
87
- // Redact environment variable patterns that might contain secrets
88
- .replace(/(OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY)[="'\s]+[A-Za-z0-9\-_]+/gi, '$1=[REDACTED]')
89
- // Redact Authorization headers
90
- .replace(/Authorization:\s*[^"\r\n]+/gi, 'Authorization: [REDACTED]')
91
- // Redact potential JWT tokens
92
- .replace(/eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]*/g, '[JWT_REDACTED]');
93
- }
94
-
95
- /**
96
- * Normalize errors into a consistent response format
97
- * @param {Error} error - The error to handle
98
- * @param {Object} context - Additional context about where/when the error occurred
99
- * @returns {Object} Formatted error response with success: false
100
- */
101
- function handleError(error, context = {}) {
102
- if (error instanceof LanceDBError) {
103
- return {
104
- success: false,
105
- error: {
106
- code: error.code,
107
- message: sanitizeErrorMessage(error.message),
108
- details: error.details,
109
- context
110
- }
111
- };
112
- }
113
-
114
- // Wrap unknown errors
115
- return {
116
- success: false,
117
- error: {
118
- code: 'UNKNOWN_ERROR',
119
- message: sanitizeErrorMessage(error.message),
120
- stack: process.env.NODE_ENV === 'development' ? error.stack : undefined,
121
- context
122
- }
123
- };
124
- }
125
-
126
- export {
127
- LanceDBError,
128
- EmbeddingError,
129
- StorageError,
130
- QueryError,
131
- ConfigurationError,
132
- handleError,
133
- sanitizeErrorMessage
134
- };
135
-
136
- export default {
137
- LanceDBError,
138
- EmbeddingError,
139
- StorageError,
140
- QueryError,
141
- ConfigurationError,
142
- handleError,
143
- sanitizeErrorMessage
144
- };
@@ -1,4 +0,0 @@
1
- export { LanceDBClient } from './client.js';
2
- export { loadConfig, validateConfig, getConfig, DEFAULTS } from './config.js';
3
- export { MEMORY_SCHEMA, INDEX_CONFIG, createMemoryTable, createMemoryTableWithDimension, createMemorySchema, getEmbeddingDimension, DEFAULT_VECTOR_DIMENSION, EMBEDDING_DIMENSIONS } from './schema.js';
4
- export { LanceDBError, EmbeddingError, StorageError, QueryError, ConfigurationError, handleError, sanitizeErrorMessage } from './errors.js';
@@ -1,217 +0,0 @@
1
- /**
2
- * LanceDB Schema Definitions for MemoryManager
3
- * Uses Apache Arrow Schema format for LanceDB JavaScript SDK
4
- *
5
- * Supports dynamic vector dimensions for different embedding models:
6
- * - all-MiniLM-L6-v2: 384 dimensions
7
- * - all-mpnet-base-v2: 768 dimensions
8
- * - text-embedding-3-small: 1536 dimensions
9
- */
10
-
11
- import * as arrow from "apache-arrow";
12
-
13
- /**
14
- * Default vector dimension (all-MiniLM-L6-v2)
15
- */
16
- export const DEFAULT_VECTOR_DIMENSION = 384;
17
-
18
- /**
19
- * Common embedding model dimensions
20
- */
21
- export const EMBEDDING_DIMENSIONS = {
22
- 'Xenova/all-MiniLM-L6-v2': 384,
23
- 'Xenova/all-mpnet-base-v2': 768,
24
- 'Xenova/distiluse-base-multilingual-cased-v1': 512,
25
- 'sentence-transformers/all-MiniLM-L6-v2': 384,
26
- 'sentence-transformers/all-mpnet-base-v2': 768,
27
- 'openai/text-embedding-3-small': 1536,
28
- 'openai/text-embedding-3-large': 3072,
29
- 'cohere/embed-english-light-v3.0': 1024,
30
- 'cohere/embed-english-v3.0': 1024,
31
- };
32
-
33
- /**
34
- * Get dimension for a given embedding model
35
- * @param {string} modelName - Embedding model name or path
36
- * @returns {number} Vector dimension
37
- */
38
- export function getEmbeddingDimension(modelName) {
39
- if (!modelName) return DEFAULT_VECTOR_DIMENSION;
40
-
41
- // Check exact match
42
- if (EMBEDDING_DIMENSIONS[modelName]) {
43
- return EMBEDDING_DIMENSIONS[modelName];
44
- }
45
-
46
- // Check for partial matches
47
- for (const [key, dimension] of Object.entries(EMBEDDING_DIMENSIONS)) {
48
- if (modelName.toLowerCase().includes(key.toLowerCase())) {
49
- return dimension;
50
- }
51
- }
52
-
53
- // Fallback to default
54
- return DEFAULT_VECTOR_DIMENSION;
55
- }
56
-
57
- /**
58
- * Create a memory schema with a specific vector dimension
59
- * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
60
- * @returns {import('apache-arrow').Schema} Arrow schema with specified dimension
61
- */
62
- export function createMemorySchema(vectorDim = DEFAULT_VECTOR_DIMENSION) {
63
- return new arrow.Schema([
64
- new arrow.Field('id', new arrow.Utf8(), false),
65
- new arrow.Field('vector',
66
- new arrow.FixedSizeList(vectorDim, new arrow.Field('item', new arrow.Float32(), true)),
67
- false
68
- ),
69
- new arrow.Field('content', new arrow.Utf8(), false),
70
- new arrow.Field('metadata', new arrow.Utf8(), true), // Stored as JSON string
71
- new arrow.Field('created_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
72
- new arrow.Field('updated_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true)
73
- ]);
74
- }
75
-
76
- /**
77
- * Create V2 memory schema with automatic recall fields
78
- * All new fields are nullable for backward compatibility
79
- * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
80
- * @returns {import('apache-arrow').Schema} Arrow schema with V2 fields
81
- */
82
- function createMemorySchemaV2(vectorDim = DEFAULT_VECTOR_DIMENSION) {
83
- return new arrow.Schema([
84
- // ========== V1 Fields (Backward Compatible) ==========
85
- new arrow.Field('id', new arrow.Utf8(), false),
86
- new arrow.Field('vector',
87
- new arrow.FixedSizeList(vectorDim, new arrow.Field('item', new arrow.Float32(), true)),
88
- false
89
- ),
90
- new arrow.Field('content', new arrow.Utf8(), false),
91
- new arrow.Field('metadata', new arrow.Utf8(), true),
92
- new arrow.Field('created_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
93
- new arrow.Field('updated_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
94
-
95
- // ========== V2 Fields (All Nullable) ==========
96
- new arrow.Field('session_id', new arrow.Utf8(), true), // Session association
97
- new arrow.Field('agent_id', new arrow.Utf8(), true), // Agent/skill that created memory
98
- new arrow.Field('memory_type', new arrow.Utf8(), true), // 'global', 'session', 'agent'
99
- new arrow.Field('importance_score', new arrow.Float32(), true), // 0.0-1.0 importance
100
- new arrow.Field('access_count', new arrow.Int32(), true), // Popularity tracking
101
- new arrow.Field('last_accessed', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
102
- ]);
103
- }
104
-
105
- /**
106
- * Create schema for synthesized skills (Recursive Skill Synthesis)
107
- * @param {number} vectorDim - Vector dimension for intent embedding
108
- * @returns {import('apache-arrow').Schema} Arrow schema
109
- */
110
- export function createSynthesizedSkillSchema(vectorDim = DEFAULT_VECTOR_DIMENSION) {
111
- return new arrow.Schema([
112
- new arrow.Field('id', new arrow.Utf8(), false),
113
- new arrow.Field('name', new arrow.Utf8(), false),
114
- new arrow.Field('intent', new arrow.Utf8(), false),
115
- new arrow.Field('yamo_text', new arrow.Utf8(), false),
116
- new arrow.Field('vector',
117
- new arrow.FixedSizeList(vectorDim, new arrow.Field('item', new arrow.Float32(), true)),
118
- false
119
- ),
120
- new arrow.Field('metadata', new arrow.Utf8(), true), // Stored as JSON: {reliability, use_count, created_at}
121
- new arrow.Field('created_at', new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false)
122
- ]);
123
- }
124
-
125
- /**
126
- * Check if a table is using V2 schema
127
- * @param {import('apache-arrow').Schema} schema - Table schema to check
128
- * @returns {boolean} True if V2 schema detected
129
- */
130
- function isSchemaV2(schema) {
131
- return schema.fields.some(f => f.name === 'session_id');
132
- }
133
-
134
- /**
135
- * Memory table schema using Apache Arrow format (default 384 dimensions)
136
- * @deprecated Use createMemorySchema(vectorDim) for dynamic dimensions
137
- */
138
- const MEMORY_SCHEMA = createMemorySchema(DEFAULT_VECTOR_DIMENSION);
139
-
140
- /**
141
- * Index configuration for memory table
142
- * Indices should be created after data is inserted
143
- */
144
- export const INDEX_CONFIG = {
145
- vector: {
146
- index_type: 'ivf_pq',
147
- metric: 'cosine',
148
- num_partitions: 256,
149
- num_sub_vectors: 8
150
- },
151
- full_text: {
152
- fields: ['content']
153
- }
154
- };
155
-
156
- /**
157
- * Creates a memory table in LanceDB with the predefined schema (384 dimensions)
158
- * @param {import('@lancedb/lancedb').Connection} db - LanceDB connection
159
- * @param {string} tableName - Name of the table to create (default: 'memory_entries')
160
- * @returns {Promise<import('@lancedb/lancedb').Table>} The created or opened table
161
- * @throws {Error} If table creation fails
162
- * @deprecated Use createMemoryTableWithDimension() for dynamic dimensions
163
- */
164
- async function createMemoryTable(db, tableName = 'memory_entries') {
165
- return createMemoryTableWithDimension(db, tableName, DEFAULT_VECTOR_DIMENSION);
166
- }
167
-
168
- /**
169
- * Creates a memory table in LanceDB with a specific vector dimension
170
- * @param {import('@lancedb/lancedb').Connection} db - LanceDB connection
171
- * @param {string} tableName - Name of the table to create
172
- * @param {number} vectorDim - Vector dimension (384, 768, 1536, etc.)
173
- * @returns {Promise<import('@lancedb/lancedb').Table>} The created or opened table
174
- * @throws {Error} If table creation fails
175
- */
176
- async function createMemoryTableWithDimension(db, tableName, vectorDim) {
177
- try {
178
- // Check if table already exists
179
- const existingTables = await db.tableNames();
180
-
181
- if (existingTables.includes(tableName)) {
182
- return await db.openTable(tableName);
183
- }
184
-
185
- // Create schema with specified dimension
186
- const schema = createMemorySchema(vectorDim);
187
-
188
- // Create table with schema
189
- // LanceDB v0.23.0+ accepts empty array as initial data with schema option
190
- const table = await db.createTable(tableName, [], { schema });
191
- return table;
192
- } catch (error) {
193
- const message = error instanceof Error ? error.message : String(error);
194
- throw new Error(`Failed to create memory table with dimension ${vectorDim}: ${message}`);
195
- }
196
- }
197
-
198
- export {
199
- MEMORY_SCHEMA,
200
- createMemoryTable,
201
- createMemoryTableWithDimension,
202
- createMemorySchemaV2,
203
- isSchemaV2
204
- };
205
-
206
- export default {
207
- MEMORY_SCHEMA,
208
- INDEX_CONFIG,
209
- createMemoryTable,
210
- createMemoryTableWithDimension,
211
- createMemorySchema,
212
- createMemorySchemaV2,
213
- isSchemaV2,
214
- getEmbeddingDimension,
215
- DEFAULT_VECTOR_DIMENSION,
216
- EMBEDDING_DIMENSIONS
217
- };
@@ -1,43 +0,0 @@
1
- /**
2
- * S-MORA Layer 0 Scrubber Error Classes
3
- * @module smora/scrubber/errors/scrubber-error
4
- */
5
-
6
- export class ScrubberError extends Error {
7
- constructor(message, details = {}) {
8
- super(message);
9
- this.name = 'ScrubberError';
10
- this.details = details;
11
- this.timestamp = new Date().toISOString();
12
- }
13
-
14
- toJSON() {
15
- return {
16
- name: this.name,
17
- message: this.message,
18
- details: this.details,
19
- timestamp: this.timestamp
20
- };
21
- }
22
- }
23
-
24
- export class StructuralCleaningError extends ScrubberError {
25
- constructor(message, details = {}) {
26
- super(message, details);
27
- this.name = 'StructuralCleaningError';
28
- }
29
- }
30
-
31
- export class ChunkingError extends ScrubberError {
32
- constructor(message, details = {}) {
33
- super(message, details);
34
- this.name = 'ChunkingError';
35
- }
36
- }
37
-
38
- export class ValidationError extends ScrubberError {
39
- constructor(message, details = {}) {
40
- super(message, details);
41
- this.name = 'ValidationError';
42
- }
43
- }