@danielsimonjr/memory-mcp 0.7.2 → 0.47.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/edge-cases/edge-cases.test.js +406 -0
- package/dist/__tests__/file-path.test.js +5 -5
- package/dist/__tests__/integration/workflows.test.js +449 -0
- package/dist/__tests__/knowledge-graph.test.js +8 -3
- package/dist/__tests__/performance/benchmarks.test.js +413 -0
- package/dist/__tests__/unit/core/EntityManager.test.js +334 -0
- package/dist/__tests__/unit/core/GraphStorage.test.js +205 -0
- package/dist/__tests__/unit/core/RelationManager.test.js +274 -0
- package/dist/__tests__/unit/features/CompressionManager.test.js +350 -0
- package/dist/__tests__/unit/search/BasicSearch.test.js +311 -0
- package/dist/__tests__/unit/search/BooleanSearch.test.js +432 -0
- package/dist/__tests__/unit/search/FuzzySearch.test.js +448 -0
- package/dist/__tests__/unit/search/RankedSearch.test.js +379 -0
- package/dist/__tests__/unit/utils/levenshtein.test.js +77 -0
- package/dist/core/EntityManager.js +554 -0
- package/dist/core/GraphStorage.js +172 -0
- package/dist/core/KnowledgeGraphManager.js +423 -0
- package/dist/core/ObservationManager.js +129 -0
- package/dist/core/RelationManager.js +186 -0
- package/dist/core/TransactionManager.js +389 -0
- package/dist/core/index.js +9 -0
- package/dist/features/AnalyticsManager.js +222 -0
- package/dist/features/ArchiveManager.js +74 -0
- package/dist/features/BackupManager.js +311 -0
- package/dist/features/CompressionManager.js +291 -0
- package/dist/features/ExportManager.js +305 -0
- package/dist/features/HierarchyManager.js +219 -0
- package/dist/features/ImportExportManager.js +50 -0
- package/dist/features/ImportManager.js +328 -0
- package/dist/features/TagManager.js +210 -0
- package/dist/features/index.js +12 -0
- package/dist/index.js +13 -996
- package/dist/memory.jsonl +18 -0
- package/dist/search/BasicSearch.js +131 -0
- package/dist/search/BooleanSearch.js +283 -0
- package/dist/search/FuzzySearch.js +96 -0
- package/dist/search/RankedSearch.js +190 -0
- package/dist/search/SavedSearchManager.js +145 -0
- package/dist/search/SearchFilterChain.js +187 -0
- package/dist/search/SearchManager.js +305 -0
- package/dist/search/SearchSuggestions.js +57 -0
- package/dist/search/TFIDFIndexManager.js +217 -0
- package/dist/search/index.js +14 -0
- package/dist/server/MCPServer.js +52 -0
- package/dist/server/toolDefinitions.js +732 -0
- package/dist/server/toolHandlers.js +117 -0
- package/dist/types/analytics.types.js +6 -0
- package/dist/types/entity.types.js +7 -0
- package/dist/types/import-export.types.js +7 -0
- package/dist/types/index.js +12 -0
- package/dist/types/search.types.js +7 -0
- package/dist/types/tag.types.js +6 -0
- package/dist/utils/constants.js +128 -0
- package/dist/utils/dateUtils.js +89 -0
- package/dist/utils/entityUtils.js +108 -0
- package/dist/utils/errors.js +121 -0
- package/dist/utils/filterUtils.js +155 -0
- package/dist/utils/index.js +39 -0
- package/dist/utils/levenshtein.js +62 -0
- package/dist/utils/logger.js +33 -0
- package/dist/utils/paginationUtils.js +81 -0
- package/dist/utils/pathUtils.js +115 -0
- package/dist/utils/responseFormatter.js +55 -0
- package/dist/utils/schemas.js +184 -0
- package/dist/utils/searchCache.js +209 -0
- package/dist/utils/tagUtils.js +107 -0
- package/dist/utils/tfidf.js +90 -0
- package/dist/utils/validationHelper.js +99 -0
- package/dist/utils/validationUtils.js +109 -0
- package/package.json +82 -48
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation Schemas
|
|
3
|
+
*
|
|
4
|
+
* Zod schemas for input validation across the memory system.
|
|
5
|
+
* Provides runtime type safety and data validation.
|
|
6
|
+
*
|
|
7
|
+
* @module utils/schemas
|
|
8
|
+
*/
|
|
9
|
+
import { z } from 'zod';
|
|
10
|
+
import { IMPORTANCE_RANGE } from './constants.js';
|
|
11
|
+
/**
|
|
12
|
+
* Importance range constants (imported from centralized constants).
|
|
13
|
+
*/
|
|
14
|
+
const MIN_IMPORTANCE = IMPORTANCE_RANGE.MIN;
|
|
15
|
+
const MAX_IMPORTANCE = IMPORTANCE_RANGE.MAX;
|
|
16
|
+
/**
|
|
17
|
+
* ISO 8601 date string validation.
|
|
18
|
+
* Accepts standard ISO format: YYYY-MM-DDTHH:mm:ss.sssZ
|
|
19
|
+
*/
|
|
20
|
+
const isoDateSchema = z.string().datetime({ message: 'Must be a valid ISO 8601 date string' });
|
|
21
|
+
/**
|
|
22
|
+
* Entity name validation.
|
|
23
|
+
* Must be a non-empty string with reasonable length constraints.
|
|
24
|
+
*/
|
|
25
|
+
const entityNameSchema = z.string()
|
|
26
|
+
.min(1, 'Entity name cannot be empty')
|
|
27
|
+
.max(500, 'Entity name cannot exceed 500 characters')
|
|
28
|
+
.trim();
|
|
29
|
+
/**
|
|
30
|
+
* Entity type validation.
|
|
31
|
+
* Must be a non-empty string (e.g., "person", "project", "concept").
|
|
32
|
+
*/
|
|
33
|
+
const entityTypeSchema = z.string()
|
|
34
|
+
.min(1, 'Entity type cannot be empty')
|
|
35
|
+
.max(100, 'Entity type cannot exceed 100 characters')
|
|
36
|
+
.trim();
|
|
37
|
+
/**
|
|
38
|
+
* Observation validation.
|
|
39
|
+
* Each observation must be a non-empty string.
|
|
40
|
+
*/
|
|
41
|
+
const observationSchema = z.string()
|
|
42
|
+
.min(1, 'Observation cannot be empty')
|
|
43
|
+
.max(5000, 'Observation cannot exceed 5000 characters');
|
|
44
|
+
/**
|
|
45
|
+
* Tag validation.
|
|
46
|
+
* Tags are normalized to lowercase and must be non-empty.
|
|
47
|
+
*/
|
|
48
|
+
const tagSchema = z.string()
|
|
49
|
+
.min(1, 'Tag cannot be empty')
|
|
50
|
+
.max(100, 'Tag cannot exceed 100 characters')
|
|
51
|
+
.trim()
|
|
52
|
+
.toLowerCase();
|
|
53
|
+
/**
|
|
54
|
+
* Importance validation.
|
|
55
|
+
* Must be a number between MIN_IMPORTANCE and MAX_IMPORTANCE (0-10).
|
|
56
|
+
*/
|
|
57
|
+
const importanceSchema = z.number()
|
|
58
|
+
.int('Importance must be an integer')
|
|
59
|
+
.min(MIN_IMPORTANCE, `Importance must be at least ${MIN_IMPORTANCE}`)
|
|
60
|
+
.max(MAX_IMPORTANCE, `Importance must be at most ${MAX_IMPORTANCE}`);
|
|
61
|
+
/**
|
|
62
|
+
* Relation type validation.
|
|
63
|
+
* Should be in snake_case format (e.g., "works_at", "manages").
|
|
64
|
+
*/
|
|
65
|
+
const relationTypeSchema = z.string()
|
|
66
|
+
.min(1, 'Relation type cannot be empty')
|
|
67
|
+
.max(100, 'Relation type cannot exceed 100 characters')
|
|
68
|
+
.trim();
|
|
69
|
+
/**
|
|
70
|
+
* Complete Entity schema with all fields.
|
|
71
|
+
* Used for validating full entity objects including timestamps.
|
|
72
|
+
*/
|
|
73
|
+
export const EntitySchema = z.object({
|
|
74
|
+
name: entityNameSchema,
|
|
75
|
+
entityType: entityTypeSchema,
|
|
76
|
+
observations: z.array(observationSchema),
|
|
77
|
+
createdAt: isoDateSchema.optional(),
|
|
78
|
+
lastModified: isoDateSchema.optional(),
|
|
79
|
+
tags: z.array(tagSchema).optional(),
|
|
80
|
+
importance: importanceSchema.optional(),
|
|
81
|
+
parentId: entityNameSchema.optional(),
|
|
82
|
+
}).strict();
|
|
83
|
+
/**
|
|
84
|
+
* Entity creation input schema.
|
|
85
|
+
* Used for validating user input when creating new entities.
|
|
86
|
+
* Timestamps are optional and will be auto-generated if not provided.
|
|
87
|
+
*/
|
|
88
|
+
export const CreateEntitySchema = z.object({
|
|
89
|
+
name: entityNameSchema,
|
|
90
|
+
entityType: entityTypeSchema,
|
|
91
|
+
observations: z.array(observationSchema),
|
|
92
|
+
tags: z.array(tagSchema).optional(),
|
|
93
|
+
importance: importanceSchema.optional(),
|
|
94
|
+
parentId: entityNameSchema.optional(),
|
|
95
|
+
createdAt: isoDateSchema.optional(),
|
|
96
|
+
lastModified: isoDateSchema.optional(),
|
|
97
|
+
});
|
|
98
|
+
/**
|
|
99
|
+
* Entity update input schema.
|
|
100
|
+
* All fields are optional for partial updates.
|
|
101
|
+
* Name cannot be updated (it's the unique identifier).
|
|
102
|
+
*/
|
|
103
|
+
export const UpdateEntitySchema = z.object({
|
|
104
|
+
entityType: entityTypeSchema.optional(),
|
|
105
|
+
observations: z.array(observationSchema).optional(),
|
|
106
|
+
tags: z.array(tagSchema).optional(),
|
|
107
|
+
importance: importanceSchema.optional(),
|
|
108
|
+
parentId: entityNameSchema.optional(),
|
|
109
|
+
});
|
|
110
|
+
/**
|
|
111
|
+
* Complete Relation schema with all fields.
|
|
112
|
+
* Used for validating full relation objects including timestamps.
|
|
113
|
+
*/
|
|
114
|
+
export const RelationSchema = z.object({
|
|
115
|
+
from: entityNameSchema,
|
|
116
|
+
to: entityNameSchema,
|
|
117
|
+
relationType: relationTypeSchema,
|
|
118
|
+
createdAt: isoDateSchema.optional(),
|
|
119
|
+
lastModified: isoDateSchema.optional(),
|
|
120
|
+
}).strict();
|
|
121
|
+
/**
|
|
122
|
+
* Relation creation input schema.
|
|
123
|
+
* Used for validating user input when creating new relations.
|
|
124
|
+
* Timestamps are optional and will be auto-generated if not provided.
|
|
125
|
+
*/
|
|
126
|
+
export const CreateRelationSchema = z.object({
|
|
127
|
+
from: entityNameSchema,
|
|
128
|
+
to: entityNameSchema,
|
|
129
|
+
relationType: relationTypeSchema,
|
|
130
|
+
createdAt: isoDateSchema.optional(),
|
|
131
|
+
lastModified: isoDateSchema.optional(),
|
|
132
|
+
});
|
|
133
|
+
/**
|
|
134
|
+
* Search query validation.
|
|
135
|
+
* Validates text search queries with reasonable length constraints.
|
|
136
|
+
*/
|
|
137
|
+
export const SearchQuerySchema = z.string()
|
|
138
|
+
.min(1, 'Search query cannot be empty')
|
|
139
|
+
.max(1000, 'Search query cannot exceed 1000 characters')
|
|
140
|
+
.trim();
|
|
141
|
+
/**
|
|
142
|
+
* Date range validation for search filters.
|
|
143
|
+
*/
|
|
144
|
+
export const DateRangeSchema = z.object({
|
|
145
|
+
start: isoDateSchema,
|
|
146
|
+
end: isoDateSchema,
|
|
147
|
+
}).refine((data) => new Date(data.start) <= new Date(data.end), { message: 'Start date must be before or equal to end date' });
|
|
148
|
+
/**
|
|
149
|
+
* Tag alias validation for TagManager.
|
|
150
|
+
*/
|
|
151
|
+
export const TagAliasSchema = z.object({
|
|
152
|
+
canonical: tagSchema,
|
|
153
|
+
aliases: z.array(tagSchema).min(1, 'Must have at least one alias'),
|
|
154
|
+
});
|
|
155
|
+
/**
|
|
156
|
+
* Export format validation.
|
|
157
|
+
*/
|
|
158
|
+
export const ExportFormatSchema = z.enum(['json', 'graphml', 'csv']);
|
|
159
|
+
/**
|
|
160
|
+
* Batch entity creation validation.
|
|
161
|
+
* Validates array of entities with maximum constraints.
|
|
162
|
+
* Empty arrays are allowed (no-op).
|
|
163
|
+
*/
|
|
164
|
+
export const BatchCreateEntitiesSchema = z.array(CreateEntitySchema)
|
|
165
|
+
.max(1000, 'Cannot create more than 1000 entities in a single batch');
|
|
166
|
+
/**
|
|
167
|
+
* Batch relation creation validation.
|
|
168
|
+
* Validates array of relations with maximum constraints.
|
|
169
|
+
* Empty arrays are allowed (no-op).
|
|
170
|
+
*/
|
|
171
|
+
export const BatchCreateRelationsSchema = z.array(CreateRelationSchema)
|
|
172
|
+
.max(1000, 'Cannot create more than 1000 relations in a single batch');
|
|
173
|
+
/**
|
|
174
|
+
* Entity name array validation for batch deletion.
|
|
175
|
+
*/
|
|
176
|
+
export const EntityNamesSchema = z.array(entityNameSchema)
|
|
177
|
+
.min(1, 'Must specify at least one entity name')
|
|
178
|
+
.max(1000, 'Cannot delete more than 1000 entities in a single batch');
|
|
179
|
+
/**
|
|
180
|
+
* Relation array validation for batch deletion.
|
|
181
|
+
*/
|
|
182
|
+
export const DeleteRelationsSchema = z.array(CreateRelationSchema)
|
|
183
|
+
.min(1, 'Must specify at least one relation')
|
|
184
|
+
.max(1000, 'Cannot delete more than 1000 relations in a single batch');
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Result Cache
|
|
3
|
+
*
|
|
4
|
+
* Simple LRU-style cache for search results with TTL support.
|
|
5
|
+
* Improves performance for repeated queries without external dependencies.
|
|
6
|
+
*
|
|
7
|
+
* @module utils/searchCache
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Simple LRU cache implementation for search results.
|
|
11
|
+
*
|
|
12
|
+
* Features:
|
|
13
|
+
* - Maximum size limit (LRU eviction when full)
|
|
14
|
+
* - TTL-based expiration
|
|
15
|
+
* - Cache statistics tracking
|
|
16
|
+
* - Hash-based key generation from query parameters
|
|
17
|
+
*/
|
|
18
|
+
export class SearchCache {
|
|
19
|
+
maxSize;
|
|
20
|
+
ttlMs;
|
|
21
|
+
cache = new Map();
|
|
22
|
+
accessOrder = [];
|
|
23
|
+
hits = 0;
|
|
24
|
+
misses = 0;
|
|
25
|
+
constructor(maxSize = 500, ttlMs = 5 * 60 * 1000 // 5 minutes default
|
|
26
|
+
) {
|
|
27
|
+
this.maxSize = maxSize;
|
|
28
|
+
this.ttlMs = ttlMs;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Generate cache key from query parameters.
|
|
32
|
+
*/
|
|
33
|
+
generateKey(params) {
|
|
34
|
+
// Sort keys for consistent hashing
|
|
35
|
+
const sorted = Object.keys(params)
|
|
36
|
+
.sort()
|
|
37
|
+
.map(key => `${key}:${JSON.stringify(params[key])}`)
|
|
38
|
+
.join('|');
|
|
39
|
+
return sorted;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Get value from cache.
|
|
43
|
+
*
|
|
44
|
+
* @param params - Query parameters to generate cache key
|
|
45
|
+
* @returns Cached value or undefined if not found/expired
|
|
46
|
+
*/
|
|
47
|
+
get(params) {
|
|
48
|
+
const key = this.generateKey(params);
|
|
49
|
+
const entry = this.cache.get(key);
|
|
50
|
+
if (!entry) {
|
|
51
|
+
this.misses++;
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
// Check expiration
|
|
55
|
+
if (Date.now() > entry.expiresAt) {
|
|
56
|
+
this.cache.delete(key);
|
|
57
|
+
this.removeFromAccessOrder(key);
|
|
58
|
+
this.misses++;
|
|
59
|
+
return undefined;
|
|
60
|
+
}
|
|
61
|
+
// Update access order (move to end = most recently used)
|
|
62
|
+
this.removeFromAccessOrder(key);
|
|
63
|
+
this.accessOrder.push(key);
|
|
64
|
+
this.hits++;
|
|
65
|
+
return entry.value;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Set value in cache.
|
|
69
|
+
*
|
|
70
|
+
* @param params - Query parameters to generate cache key
|
|
71
|
+
* @param value - Value to cache
|
|
72
|
+
*/
|
|
73
|
+
set(params, value) {
|
|
74
|
+
const key = this.generateKey(params);
|
|
75
|
+
// Remove old entry if exists
|
|
76
|
+
if (this.cache.has(key)) {
|
|
77
|
+
this.removeFromAccessOrder(key);
|
|
78
|
+
}
|
|
79
|
+
// Evict least recently used if at capacity
|
|
80
|
+
if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
|
|
81
|
+
const lruKey = this.accessOrder.shift();
|
|
82
|
+
if (lruKey) {
|
|
83
|
+
this.cache.delete(lruKey);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Add new entry
|
|
87
|
+
this.cache.set(key, {
|
|
88
|
+
value,
|
|
89
|
+
timestamp: Date.now(),
|
|
90
|
+
expiresAt: Date.now() + this.ttlMs,
|
|
91
|
+
});
|
|
92
|
+
this.accessOrder.push(key);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Invalidate all cached entries.
|
|
96
|
+
*/
|
|
97
|
+
clear() {
|
|
98
|
+
this.cache.clear();
|
|
99
|
+
this.accessOrder = [];
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Remove specific entry from access order.
|
|
103
|
+
*/
|
|
104
|
+
removeFromAccessOrder(key) {
|
|
105
|
+
const index = this.accessOrder.indexOf(key);
|
|
106
|
+
if (index > -1) {
|
|
107
|
+
this.accessOrder.splice(index, 1);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Get cache statistics.
|
|
112
|
+
*/
|
|
113
|
+
getStats() {
|
|
114
|
+
const total = this.hits + this.misses;
|
|
115
|
+
return {
|
|
116
|
+
hits: this.hits,
|
|
117
|
+
misses: this.misses,
|
|
118
|
+
size: this.cache.size,
|
|
119
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Reset cache statistics.
|
|
124
|
+
*/
|
|
125
|
+
resetStats() {
|
|
126
|
+
this.hits = 0;
|
|
127
|
+
this.misses = 0;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Clean up expired entries.
|
|
131
|
+
*
|
|
132
|
+
* Should be called periodically to prevent memory buildup.
|
|
133
|
+
*/
|
|
134
|
+
cleanupExpired() {
|
|
135
|
+
const now = Date.now();
|
|
136
|
+
const keysToDelete = [];
|
|
137
|
+
for (const [key, entry] of this.cache.entries()) {
|
|
138
|
+
if (now > entry.expiresAt) {
|
|
139
|
+
keysToDelete.push(key);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
for (const key of keysToDelete) {
|
|
143
|
+
this.cache.delete(key);
|
|
144
|
+
this.removeFromAccessOrder(key);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Get current cache size.
|
|
149
|
+
*/
|
|
150
|
+
get size() {
|
|
151
|
+
return this.cache.size;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Check if cache has entry for params.
|
|
155
|
+
*/
|
|
156
|
+
has(params) {
|
|
157
|
+
const key = this.generateKey(params);
|
|
158
|
+
const entry = this.cache.get(key);
|
|
159
|
+
if (!entry)
|
|
160
|
+
return false;
|
|
161
|
+
// Check expiration
|
|
162
|
+
if (Date.now() > entry.expiresAt) {
|
|
163
|
+
this.cache.delete(key);
|
|
164
|
+
this.removeFromAccessOrder(key);
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Global search caches for different search types.
|
|
172
|
+
*/
|
|
173
|
+
export const searchCaches = {
|
|
174
|
+
basic: new SearchCache(),
|
|
175
|
+
ranked: new SearchCache(),
|
|
176
|
+
boolean: new SearchCache(),
|
|
177
|
+
fuzzy: new SearchCache(),
|
|
178
|
+
};
|
|
179
|
+
/**
|
|
180
|
+
* Clear all search caches.
|
|
181
|
+
*
|
|
182
|
+
* Should be called when graph is modified to ensure cache consistency.
|
|
183
|
+
*/
|
|
184
|
+
export function clearAllSearchCaches() {
|
|
185
|
+
searchCaches.basic.clear();
|
|
186
|
+
searchCaches.ranked.clear();
|
|
187
|
+
searchCaches.boolean.clear();
|
|
188
|
+
searchCaches.fuzzy.clear();
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Get combined statistics for all caches.
|
|
192
|
+
*/
|
|
193
|
+
export function getAllCacheStats() {
|
|
194
|
+
return {
|
|
195
|
+
basic: searchCaches.basic.getStats(),
|
|
196
|
+
ranked: searchCaches.ranked.getStats(),
|
|
197
|
+
boolean: searchCaches.boolean.getStats(),
|
|
198
|
+
fuzzy: searchCaches.fuzzy.getStats(),
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Clean up expired entries in all caches.
|
|
203
|
+
*/
|
|
204
|
+
export function cleanupAllCaches() {
|
|
205
|
+
searchCaches.basic.cleanupExpired();
|
|
206
|
+
searchCaches.ranked.cleanupExpired();
|
|
207
|
+
searchCaches.boolean.cleanupExpired();
|
|
208
|
+
searchCaches.fuzzy.cleanupExpired();
|
|
209
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag Normalization and Matching Utilities
|
|
3
|
+
*
|
|
4
|
+
* Centralizes tag operations to eliminate duplicate normalization logic
|
|
5
|
+
* across the codebase. All tags are normalized to lowercase for consistent matching.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Normalizes a single tag to lowercase and trimmed.
|
|
9
|
+
*
|
|
10
|
+
* @param tag - Tag to normalize
|
|
11
|
+
* @returns Normalized tag
|
|
12
|
+
*/
|
|
13
|
+
export function normalizeTag(tag) {
|
|
14
|
+
return tag.toLowerCase().trim();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Normalizes an array of tags to lowercase.
|
|
18
|
+
* Handles undefined/null input gracefully.
|
|
19
|
+
*
|
|
20
|
+
* @param tags - Array of tags to normalize, or undefined
|
|
21
|
+
* @returns Normalized tags array, or empty array if input is undefined/null
|
|
22
|
+
*/
|
|
23
|
+
export function normalizeTags(tags) {
|
|
24
|
+
if (!tags || tags.length === 0)
|
|
25
|
+
return [];
|
|
26
|
+
return tags.map(tag => tag.toLowerCase());
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Checks if an entity's tags include any of the specified search tags.
|
|
30
|
+
* Both inputs are normalized before comparison.
|
|
31
|
+
*
|
|
32
|
+
* @param entityTags - Tags on the entity (may be undefined)
|
|
33
|
+
* @param searchTags - Tags to search for (may be undefined)
|
|
34
|
+
* @returns true if any search tag matches any entity tag, false if no match or either is empty
|
|
35
|
+
*/
|
|
36
|
+
export function hasMatchingTag(entityTags, searchTags) {
|
|
37
|
+
if (!entityTags || entityTags.length === 0)
|
|
38
|
+
return false;
|
|
39
|
+
if (!searchTags || searchTags.length === 0)
|
|
40
|
+
return false;
|
|
41
|
+
const normalizedEntity = normalizeTags(entityTags);
|
|
42
|
+
const normalizedSearch = normalizeTags(searchTags);
|
|
43
|
+
return normalizedSearch.some(tag => normalizedEntity.includes(tag));
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Checks if entity tags include ALL of the specified required tags.
|
|
47
|
+
*
|
|
48
|
+
* @param entityTags - Tags on the entity (may be undefined)
|
|
49
|
+
* @param requiredTags - All tags that must be present
|
|
50
|
+
* @returns true if all required tags are present
|
|
51
|
+
*/
|
|
52
|
+
export function hasAllTags(entityTags, requiredTags) {
|
|
53
|
+
if (!entityTags || entityTags.length === 0)
|
|
54
|
+
return false;
|
|
55
|
+
if (requiredTags.length === 0)
|
|
56
|
+
return true;
|
|
57
|
+
const normalizedEntity = normalizeTags(entityTags);
|
|
58
|
+
return normalizeTags(requiredTags).every(tag => normalizedEntity.includes(tag));
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Filters entities by tag match.
|
|
62
|
+
* Returns all entities if searchTags is empty or undefined.
|
|
63
|
+
*
|
|
64
|
+
* @param entities - Array of entities with optional tags property
|
|
65
|
+
* @param searchTags - Tags to filter by
|
|
66
|
+
* @returns Filtered entities that have at least one matching tag
|
|
67
|
+
*/
|
|
68
|
+
export function filterByTags(entities, searchTags) {
|
|
69
|
+
if (!searchTags || searchTags.length === 0) {
|
|
70
|
+
return entities;
|
|
71
|
+
}
|
|
72
|
+
const normalizedSearch = normalizeTags(searchTags);
|
|
73
|
+
return entities.filter(entity => {
|
|
74
|
+
if (!entity.tags || entity.tags.length === 0)
|
|
75
|
+
return false;
|
|
76
|
+
const normalizedEntity = normalizeTags(entity.tags);
|
|
77
|
+
return normalizedSearch.some(tag => normalizedEntity.includes(tag));
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Adds new tags to an existing tag array, avoiding duplicates.
|
|
82
|
+
* All tags are normalized to lowercase.
|
|
83
|
+
*
|
|
84
|
+
* @param existingTags - Current tags (may be undefined)
|
|
85
|
+
* @param newTags - Tags to add
|
|
86
|
+
* @returns Combined tags array with no duplicates
|
|
87
|
+
*/
|
|
88
|
+
export function addUniqueTags(existingTags, newTags) {
|
|
89
|
+
const existing = normalizeTags(existingTags);
|
|
90
|
+
const toAdd = normalizeTags(newTags);
|
|
91
|
+
const uniqueNew = toAdd.filter(tag => !existing.includes(tag));
|
|
92
|
+
return [...existing, ...uniqueNew];
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Removes specified tags from an existing tag array.
|
|
96
|
+
* Comparison is case-insensitive.
|
|
97
|
+
*
|
|
98
|
+
* @param existingTags - Current tags (may be undefined)
|
|
99
|
+
* @param tagsToRemove - Tags to remove
|
|
100
|
+
* @returns Tags array with specified tags removed
|
|
101
|
+
*/
|
|
102
|
+
export function removeTags(existingTags, tagsToRemove) {
|
|
103
|
+
if (!existingTags || existingTags.length === 0)
|
|
104
|
+
return [];
|
|
105
|
+
const toRemoveNormalized = normalizeTags(tagsToRemove);
|
|
106
|
+
return existingTags.filter(tag => !toRemoveNormalized.includes(tag.toLowerCase()));
|
|
107
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TF-IDF (Term Frequency-Inverse Document Frequency) Utilities
|
|
3
|
+
*
|
|
4
|
+
* Algorithms for calculating TF-IDF scores used in ranked search.
|
|
5
|
+
* TF-IDF measures how important a term is to a document in a collection.
|
|
6
|
+
*
|
|
7
|
+
* @module utils/tfidf
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Calculate Term Frequency (TF) for a term in a document.
|
|
11
|
+
*
|
|
12
|
+
* TF = (Number of times term appears in document) / (Total terms in document)
|
|
13
|
+
*
|
|
14
|
+
* @param term - The search term
|
|
15
|
+
* @param document - The document text
|
|
16
|
+
* @returns Term frequency (0.0 to 1.0)
|
|
17
|
+
*/
|
|
18
|
+
export function calculateTF(term, document) {
|
|
19
|
+
const termLower = term.toLowerCase();
|
|
20
|
+
const tokens = tokenize(document);
|
|
21
|
+
if (tokens.length === 0)
|
|
22
|
+
return 0;
|
|
23
|
+
const termCount = tokens.filter(t => t === termLower).length;
|
|
24
|
+
return termCount / tokens.length;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Calculate Inverse Document Frequency (IDF) for a term across documents.
|
|
28
|
+
*
|
|
29
|
+
* IDF = log(Total documents / Documents containing term)
|
|
30
|
+
*
|
|
31
|
+
* @param term - The search term
|
|
32
|
+
* @param documents - Array of document texts
|
|
33
|
+
* @returns Inverse document frequency
|
|
34
|
+
*/
|
|
35
|
+
export function calculateIDF(term, documents) {
|
|
36
|
+
if (documents.length === 0)
|
|
37
|
+
return 0;
|
|
38
|
+
const termLower = term.toLowerCase();
|
|
39
|
+
const docsWithTerm = documents.filter(doc => tokenize(doc).includes(termLower)).length;
|
|
40
|
+
if (docsWithTerm === 0)
|
|
41
|
+
return 0;
|
|
42
|
+
return Math.log(documents.length / docsWithTerm);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Calculate TF-IDF score for a term in a document.
|
|
46
|
+
*
|
|
47
|
+
* TF-IDF = TF * IDF
|
|
48
|
+
*
|
|
49
|
+
* Higher scores indicate more important/relevant terms.
|
|
50
|
+
*
|
|
51
|
+
* @param term - The search term
|
|
52
|
+
* @param document - The document text
|
|
53
|
+
* @param documents - Array of all documents
|
|
54
|
+
* @returns TF-IDF score
|
|
55
|
+
*/
|
|
56
|
+
export function calculateTFIDF(term, document, documents) {
|
|
57
|
+
const tf = calculateTF(term, document);
|
|
58
|
+
const idf = calculateIDF(term, documents);
|
|
59
|
+
return tf * idf;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Tokenize text into lowercase words.
|
|
63
|
+
*
|
|
64
|
+
* Splits on whitespace and removes punctuation.
|
|
65
|
+
*
|
|
66
|
+
* @param text - Text to tokenize
|
|
67
|
+
* @returns Array of lowercase tokens
|
|
68
|
+
*/
|
|
69
|
+
export function tokenize(text) {
|
|
70
|
+
return text
|
|
71
|
+
.toLowerCase()
|
|
72
|
+
.replace(/[^\w\s]/g, ' ')
|
|
73
|
+
.split(/\s+/)
|
|
74
|
+
.filter(token => token.length > 0);
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Calculate TF-IDF scores for multiple search terms.
|
|
78
|
+
*
|
|
79
|
+
* @param terms - Array of search terms
|
|
80
|
+
* @param document - The document text
|
|
81
|
+
* @param documents - Array of all documents
|
|
82
|
+
* @returns Map of term to TF-IDF score
|
|
83
|
+
*/
|
|
84
|
+
export function calculateMultiTermTFIDF(terms, document, documents) {
|
|
85
|
+
const scores = new Map();
|
|
86
|
+
for (const term of terms) {
|
|
87
|
+
scores.set(term, calculateTFIDF(term, document, documents));
|
|
88
|
+
}
|
|
89
|
+
return scores;
|
|
90
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zod Schema Validation Helper
|
|
3
|
+
*
|
|
4
|
+
* Centralizes Zod validation patterns to eliminate redundant error formatting
|
|
5
|
+
* and validation logic across the codebase.
|
|
6
|
+
*/
|
|
7
|
+
import { ValidationError } from './errors.js';
|
|
8
|
+
/**
|
|
9
|
+
* Formats Zod errors into human-readable strings.
|
|
10
|
+
*
|
|
11
|
+
* @param error - Zod error object
|
|
12
|
+
* @returns Array of formatted error messages
|
|
13
|
+
*/
|
|
14
|
+
export function formatZodErrors(error) {
|
|
15
|
+
return error.issues.map(issue => {
|
|
16
|
+
const path = issue.path.length > 0 ? `${issue.path.join('.')}: ` : '';
|
|
17
|
+
return `${path}${issue.message}`;
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Validates data against a Zod schema and returns the typed result.
|
|
22
|
+
* Throws ValidationError with formatted error messages on failure.
|
|
23
|
+
*
|
|
24
|
+
* @param data - The data to validate
|
|
25
|
+
* @param schema - The Zod schema to validate against
|
|
26
|
+
* @param errorMessage - Custom error message prefix (default: 'Validation failed')
|
|
27
|
+
* @returns The validated and typed data
|
|
28
|
+
* @throws ValidationError if validation fails
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* ```typescript
|
|
32
|
+
* const entities = validateWithSchema(
|
|
33
|
+
* input,
|
|
34
|
+
* BatchCreateEntitiesSchema,
|
|
35
|
+
* 'Invalid entity data'
|
|
36
|
+
* );
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
export function validateWithSchema(data, schema, errorMessage = 'Validation failed') {
|
|
40
|
+
const result = schema.safeParse(data);
|
|
41
|
+
if (!result.success) {
|
|
42
|
+
const errors = formatZodErrors(result.error);
|
|
43
|
+
throw new ValidationError(errorMessage, errors);
|
|
44
|
+
}
|
|
45
|
+
return result.data;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Validates data and returns a result object instead of throwing.
|
|
49
|
+
* Useful when you want to handle validation errors gracefully.
|
|
50
|
+
*
|
|
51
|
+
* @param data - The data to validate
|
|
52
|
+
* @param schema - The Zod schema to validate against
|
|
53
|
+
* @returns Result object with success status and either data or errors
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* const result = validateSafe(input, EntitySchema);
|
|
58
|
+
* if (result.success) {
|
|
59
|
+
* console.log(result.data);
|
|
60
|
+
* } else {
|
|
61
|
+
* console.error(result.errors);
|
|
62
|
+
* }
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
export function validateSafe(data, schema) {
|
|
66
|
+
const result = schema.safeParse(data);
|
|
67
|
+
if (result.success) {
|
|
68
|
+
return { success: true, data: result.data };
|
|
69
|
+
}
|
|
70
|
+
return { success: false, errors: formatZodErrors(result.error) };
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Validates an array of items against a schema.
|
|
74
|
+
* Returns detailed information about which items failed validation.
|
|
75
|
+
*
|
|
76
|
+
* @param items - Array of items to validate
|
|
77
|
+
* @param schema - Zod schema for individual items
|
|
78
|
+
* @param errorMessage - Custom error message prefix
|
|
79
|
+
* @returns Array of validated items
|
|
80
|
+
* @throws ValidationError if any item fails validation
|
|
81
|
+
*/
|
|
82
|
+
export function validateArrayWithSchema(items, schema, errorMessage = 'Array validation failed') {
|
|
83
|
+
const errors = [];
|
|
84
|
+
const validated = [];
|
|
85
|
+
for (let i = 0; i < items.length; i++) {
|
|
86
|
+
const result = schema.safeParse(items[i]);
|
|
87
|
+
if (result.success) {
|
|
88
|
+
validated.push(result.data);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
const itemErrors = formatZodErrors(result.error);
|
|
92
|
+
errors.push(...itemErrors.map(e => `[${i}] ${e}`));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (errors.length > 0) {
|
|
96
|
+
throw new ValidationError(errorMessage, errors);
|
|
97
|
+
}
|
|
98
|
+
return validated;
|
|
99
|
+
}
|