@danielsimonjr/memory-mcp 0.7.2 → 0.47.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/__tests__/edge-cases/edge-cases.test.js +406 -0
  2. package/dist/__tests__/file-path.test.js +5 -5
  3. package/dist/__tests__/integration/workflows.test.js +449 -0
  4. package/dist/__tests__/knowledge-graph.test.js +8 -3
  5. package/dist/__tests__/performance/benchmarks.test.js +413 -0
  6. package/dist/__tests__/unit/core/EntityManager.test.js +334 -0
  7. package/dist/__tests__/unit/core/GraphStorage.test.js +205 -0
  8. package/dist/__tests__/unit/core/RelationManager.test.js +274 -0
  9. package/dist/__tests__/unit/features/CompressionManager.test.js +350 -0
  10. package/dist/__tests__/unit/search/BasicSearch.test.js +311 -0
  11. package/dist/__tests__/unit/search/BooleanSearch.test.js +432 -0
  12. package/dist/__tests__/unit/search/FuzzySearch.test.js +448 -0
  13. package/dist/__tests__/unit/search/RankedSearch.test.js +379 -0
  14. package/dist/__tests__/unit/utils/levenshtein.test.js +77 -0
  15. package/dist/core/EntityManager.js +554 -0
  16. package/dist/core/GraphStorage.js +172 -0
  17. package/dist/core/KnowledgeGraphManager.js +423 -0
  18. package/dist/core/ObservationManager.js +129 -0
  19. package/dist/core/RelationManager.js +186 -0
  20. package/dist/core/TransactionManager.js +389 -0
  21. package/dist/core/index.js +9 -0
  22. package/dist/features/AnalyticsManager.js +222 -0
  23. package/dist/features/ArchiveManager.js +74 -0
  24. package/dist/features/BackupManager.js +311 -0
  25. package/dist/features/CompressionManager.js +291 -0
  26. package/dist/features/ExportManager.js +305 -0
  27. package/dist/features/HierarchyManager.js +219 -0
  28. package/dist/features/ImportExportManager.js +50 -0
  29. package/dist/features/ImportManager.js +328 -0
  30. package/dist/features/TagManager.js +210 -0
  31. package/dist/features/index.js +12 -0
  32. package/dist/index.js +13 -996
  33. package/dist/memory.jsonl +18 -0
  34. package/dist/search/BasicSearch.js +131 -0
  35. package/dist/search/BooleanSearch.js +283 -0
  36. package/dist/search/FuzzySearch.js +96 -0
  37. package/dist/search/RankedSearch.js +190 -0
  38. package/dist/search/SavedSearchManager.js +145 -0
  39. package/dist/search/SearchFilterChain.js +187 -0
  40. package/dist/search/SearchManager.js +305 -0
  41. package/dist/search/SearchSuggestions.js +57 -0
  42. package/dist/search/TFIDFIndexManager.js +217 -0
  43. package/dist/search/index.js +14 -0
  44. package/dist/server/MCPServer.js +52 -0
  45. package/dist/server/toolDefinitions.js +732 -0
  46. package/dist/server/toolHandlers.js +117 -0
  47. package/dist/types/analytics.types.js +6 -0
  48. package/dist/types/entity.types.js +7 -0
  49. package/dist/types/import-export.types.js +7 -0
  50. package/dist/types/index.js +12 -0
  51. package/dist/types/search.types.js +7 -0
  52. package/dist/types/tag.types.js +6 -0
  53. package/dist/utils/constants.js +128 -0
  54. package/dist/utils/dateUtils.js +89 -0
  55. package/dist/utils/entityUtils.js +108 -0
  56. package/dist/utils/errors.js +121 -0
  57. package/dist/utils/filterUtils.js +155 -0
  58. package/dist/utils/index.js +39 -0
  59. package/dist/utils/levenshtein.js +62 -0
  60. package/dist/utils/logger.js +33 -0
  61. package/dist/utils/paginationUtils.js +81 -0
  62. package/dist/utils/pathUtils.js +115 -0
  63. package/dist/utils/responseFormatter.js +55 -0
  64. package/dist/utils/schemas.js +184 -0
  65. package/dist/utils/searchCache.js +209 -0
  66. package/dist/utils/tagUtils.js +107 -0
  67. package/dist/utils/tfidf.js +90 -0
  68. package/dist/utils/validationHelper.js +99 -0
  69. package/dist/utils/validationUtils.js +109 -0
  70. package/package.json +82 -48
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Validation Schemas
3
+ *
4
+ * Zod schemas for input validation across the memory system.
5
+ * Provides runtime type safety and data validation.
6
+ *
7
+ * @module utils/schemas
8
+ */
9
+ import { z } from 'zod';
10
+ import { IMPORTANCE_RANGE } from './constants.js';
11
+ /**
12
+ * Importance range constants (imported from centralized constants).
13
+ */
14
+ const MIN_IMPORTANCE = IMPORTANCE_RANGE.MIN;
15
+ const MAX_IMPORTANCE = IMPORTANCE_RANGE.MAX;
16
+ /**
17
+ * ISO 8601 date string validation.
18
+ * Accepts standard ISO format: YYYY-MM-DDTHH:mm:ss.sssZ
19
+ */
20
+ const isoDateSchema = z.string().datetime({ message: 'Must be a valid ISO 8601 date string' });
21
+ /**
22
+ * Entity name validation.
23
+ * Must be a non-empty string with reasonable length constraints.
24
+ */
25
+ const entityNameSchema = z.string()
26
+ .min(1, 'Entity name cannot be empty')
27
+ .max(500, 'Entity name cannot exceed 500 characters')
28
+ .trim();
29
+ /**
30
+ * Entity type validation.
31
+ * Must be a non-empty string (e.g., "person", "project", "concept").
32
+ */
33
+ const entityTypeSchema = z.string()
34
+ .min(1, 'Entity type cannot be empty')
35
+ .max(100, 'Entity type cannot exceed 100 characters')
36
+ .trim();
37
+ /**
38
+ * Observation validation.
39
+ * Each observation must be a non-empty string.
40
+ */
41
+ const observationSchema = z.string()
42
+ .min(1, 'Observation cannot be empty')
43
+ .max(5000, 'Observation cannot exceed 5000 characters');
44
+ /**
45
+ * Tag validation.
46
+ * Tags are normalized to lowercase and must be non-empty.
47
+ */
48
+ const tagSchema = z.string()
49
+ .min(1, 'Tag cannot be empty')
50
+ .max(100, 'Tag cannot exceed 100 characters')
51
+ .trim()
52
+ .toLowerCase();
53
+ /**
54
+ * Importance validation.
55
+ * Must be a number between MIN_IMPORTANCE and MAX_IMPORTANCE (0-10).
56
+ */
57
+ const importanceSchema = z.number()
58
+ .int('Importance must be an integer')
59
+ .min(MIN_IMPORTANCE, `Importance must be at least ${MIN_IMPORTANCE}`)
60
+ .max(MAX_IMPORTANCE, `Importance must be at most ${MAX_IMPORTANCE}`);
61
+ /**
62
+ * Relation type validation.
63
+ * Should be in snake_case format (e.g., "works_at", "manages").
64
+ */
65
+ const relationTypeSchema = z.string()
66
+ .min(1, 'Relation type cannot be empty')
67
+ .max(100, 'Relation type cannot exceed 100 characters')
68
+ .trim();
69
+ /**
70
+ * Complete Entity schema with all fields.
71
+ * Used for validating full entity objects including timestamps.
72
+ */
73
+ export const EntitySchema = z.object({
74
+ name: entityNameSchema,
75
+ entityType: entityTypeSchema,
76
+ observations: z.array(observationSchema),
77
+ createdAt: isoDateSchema.optional(),
78
+ lastModified: isoDateSchema.optional(),
79
+ tags: z.array(tagSchema).optional(),
80
+ importance: importanceSchema.optional(),
81
+ parentId: entityNameSchema.optional(),
82
+ }).strict();
83
+ /**
84
+ * Entity creation input schema.
85
+ * Used for validating user input when creating new entities.
86
+ * Timestamps are optional and will be auto-generated if not provided.
87
+ */
88
+ export const CreateEntitySchema = z.object({
89
+ name: entityNameSchema,
90
+ entityType: entityTypeSchema,
91
+ observations: z.array(observationSchema),
92
+ tags: z.array(tagSchema).optional(),
93
+ importance: importanceSchema.optional(),
94
+ parentId: entityNameSchema.optional(),
95
+ createdAt: isoDateSchema.optional(),
96
+ lastModified: isoDateSchema.optional(),
97
+ });
98
+ /**
99
+ * Entity update input schema.
100
+ * All fields are optional for partial updates.
101
+ * Name cannot be updated (it's the unique identifier).
102
+ */
103
+ export const UpdateEntitySchema = z.object({
104
+ entityType: entityTypeSchema.optional(),
105
+ observations: z.array(observationSchema).optional(),
106
+ tags: z.array(tagSchema).optional(),
107
+ importance: importanceSchema.optional(),
108
+ parentId: entityNameSchema.optional(),
109
+ });
110
+ /**
111
+ * Complete Relation schema with all fields.
112
+ * Used for validating full relation objects including timestamps.
113
+ */
114
+ export const RelationSchema = z.object({
115
+ from: entityNameSchema,
116
+ to: entityNameSchema,
117
+ relationType: relationTypeSchema,
118
+ createdAt: isoDateSchema.optional(),
119
+ lastModified: isoDateSchema.optional(),
120
+ }).strict();
121
+ /**
122
+ * Relation creation input schema.
123
+ * Used for validating user input when creating new relations.
124
+ * Timestamps are optional and will be auto-generated if not provided.
125
+ */
126
+ export const CreateRelationSchema = z.object({
127
+ from: entityNameSchema,
128
+ to: entityNameSchema,
129
+ relationType: relationTypeSchema,
130
+ createdAt: isoDateSchema.optional(),
131
+ lastModified: isoDateSchema.optional(),
132
+ });
133
+ /**
134
+ * Search query validation.
135
+ * Validates text search queries with reasonable length constraints.
136
+ */
137
+ export const SearchQuerySchema = z.string()
138
+ .min(1, 'Search query cannot be empty')
139
+ .max(1000, 'Search query cannot exceed 1000 characters')
140
+ .trim();
141
+ /**
142
+ * Date range validation for search filters.
143
+ */
144
+ export const DateRangeSchema = z.object({
145
+ start: isoDateSchema,
146
+ end: isoDateSchema,
147
+ }).refine((data) => new Date(data.start) <= new Date(data.end), { message: 'Start date must be before or equal to end date' });
148
+ /**
149
+ * Tag alias validation for TagManager.
150
+ */
151
+ export const TagAliasSchema = z.object({
152
+ canonical: tagSchema,
153
+ aliases: z.array(tagSchema).min(1, 'Must have at least one alias'),
154
+ });
155
+ /**
156
+ * Export format validation.
157
+ */
158
+ export const ExportFormatSchema = z.enum(['json', 'graphml', 'csv']);
159
+ /**
160
+ * Batch entity creation validation.
161
+ * Validates array of entities with maximum constraints.
162
+ * Empty arrays are allowed (no-op).
163
+ */
164
+ export const BatchCreateEntitiesSchema = z.array(CreateEntitySchema)
165
+ .max(1000, 'Cannot create more than 1000 entities in a single batch');
166
+ /**
167
+ * Batch relation creation validation.
168
+ * Validates array of relations with maximum constraints.
169
+ * Empty arrays are allowed (no-op).
170
+ */
171
+ export const BatchCreateRelationsSchema = z.array(CreateRelationSchema)
172
+ .max(1000, 'Cannot create more than 1000 relations in a single batch');
173
+ /**
174
+ * Entity name array validation for batch deletion.
175
+ */
176
+ export const EntityNamesSchema = z.array(entityNameSchema)
177
+ .min(1, 'Must specify at least one entity name')
178
+ .max(1000, 'Cannot delete more than 1000 entities in a single batch');
179
+ /**
180
+ * Relation array validation for batch deletion.
181
+ */
182
+ export const DeleteRelationsSchema = z.array(CreateRelationSchema)
183
+ .min(1, 'Must specify at least one relation')
184
+ .max(1000, 'Cannot delete more than 1000 relations in a single batch');
@@ -0,0 +1,209 @@
1
+ /**
2
+ * Search Result Cache
3
+ *
4
+ * Simple LRU-style cache for search results with TTL support.
5
+ * Improves performance for repeated queries without external dependencies.
6
+ *
7
+ * @module utils/searchCache
8
+ */
9
+ /**
10
+ * Simple LRU cache implementation for search results.
11
+ *
12
+ * Features:
13
+ * - Maximum size limit (LRU eviction when full)
14
+ * - TTL-based expiration
15
+ * - Cache statistics tracking
16
+ * - Hash-based key generation from query parameters
17
+ */
18
+ export class SearchCache {
19
+ maxSize;
20
+ ttlMs;
21
+ cache = new Map();
22
+ accessOrder = [];
23
+ hits = 0;
24
+ misses = 0;
25
+ constructor(maxSize = 500, ttlMs = 5 * 60 * 1000 // 5 minutes default
26
+ ) {
27
+ this.maxSize = maxSize;
28
+ this.ttlMs = ttlMs;
29
+ }
30
+ /**
31
+ * Generate cache key from query parameters.
32
+ */
33
+ generateKey(params) {
34
+ // Sort keys for consistent hashing
35
+ const sorted = Object.keys(params)
36
+ .sort()
37
+ .map(key => `${key}:${JSON.stringify(params[key])}`)
38
+ .join('|');
39
+ return sorted;
40
+ }
41
+ /**
42
+ * Get value from cache.
43
+ *
44
+ * @param params - Query parameters to generate cache key
45
+ * @returns Cached value or undefined if not found/expired
46
+ */
47
+ get(params) {
48
+ const key = this.generateKey(params);
49
+ const entry = this.cache.get(key);
50
+ if (!entry) {
51
+ this.misses++;
52
+ return undefined;
53
+ }
54
+ // Check expiration
55
+ if (Date.now() > entry.expiresAt) {
56
+ this.cache.delete(key);
57
+ this.removeFromAccessOrder(key);
58
+ this.misses++;
59
+ return undefined;
60
+ }
61
+ // Update access order (move to end = most recently used)
62
+ this.removeFromAccessOrder(key);
63
+ this.accessOrder.push(key);
64
+ this.hits++;
65
+ return entry.value;
66
+ }
67
+ /**
68
+ * Set value in cache.
69
+ *
70
+ * @param params - Query parameters to generate cache key
71
+ * @param value - Value to cache
72
+ */
73
+ set(params, value) {
74
+ const key = this.generateKey(params);
75
+ // Remove old entry if exists
76
+ if (this.cache.has(key)) {
77
+ this.removeFromAccessOrder(key);
78
+ }
79
+ // Evict least recently used if at capacity
80
+ if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
81
+ const lruKey = this.accessOrder.shift();
82
+ if (lruKey) {
83
+ this.cache.delete(lruKey);
84
+ }
85
+ }
86
+ // Add new entry
87
+ this.cache.set(key, {
88
+ value,
89
+ timestamp: Date.now(),
90
+ expiresAt: Date.now() + this.ttlMs,
91
+ });
92
+ this.accessOrder.push(key);
93
+ }
94
+ /**
95
+ * Invalidate all cached entries.
96
+ */
97
+ clear() {
98
+ this.cache.clear();
99
+ this.accessOrder = [];
100
+ }
101
+ /**
102
+ * Remove specific entry from access order.
103
+ */
104
+ removeFromAccessOrder(key) {
105
+ const index = this.accessOrder.indexOf(key);
106
+ if (index > -1) {
107
+ this.accessOrder.splice(index, 1);
108
+ }
109
+ }
110
+ /**
111
+ * Get cache statistics.
112
+ */
113
+ getStats() {
114
+ const total = this.hits + this.misses;
115
+ return {
116
+ hits: this.hits,
117
+ misses: this.misses,
118
+ size: this.cache.size,
119
+ hitRate: total > 0 ? this.hits / total : 0,
120
+ };
121
+ }
122
+ /**
123
+ * Reset cache statistics.
124
+ */
125
+ resetStats() {
126
+ this.hits = 0;
127
+ this.misses = 0;
128
+ }
129
+ /**
130
+ * Clean up expired entries.
131
+ *
132
+ * Should be called periodically to prevent memory buildup.
133
+ */
134
+ cleanupExpired() {
135
+ const now = Date.now();
136
+ const keysToDelete = [];
137
+ for (const [key, entry] of this.cache.entries()) {
138
+ if (now > entry.expiresAt) {
139
+ keysToDelete.push(key);
140
+ }
141
+ }
142
+ for (const key of keysToDelete) {
143
+ this.cache.delete(key);
144
+ this.removeFromAccessOrder(key);
145
+ }
146
+ }
147
+ /**
148
+ * Get current cache size.
149
+ */
150
+ get size() {
151
+ return this.cache.size;
152
+ }
153
+ /**
154
+ * Check if cache has entry for params.
155
+ */
156
+ has(params) {
157
+ const key = this.generateKey(params);
158
+ const entry = this.cache.get(key);
159
+ if (!entry)
160
+ return false;
161
+ // Check expiration
162
+ if (Date.now() > entry.expiresAt) {
163
+ this.cache.delete(key);
164
+ this.removeFromAccessOrder(key);
165
+ return false;
166
+ }
167
+ return true;
168
+ }
169
+ }
170
+ /**
171
+ * Global search caches for different search types.
172
+ */
173
+ export const searchCaches = {
174
+ basic: new SearchCache(),
175
+ ranked: new SearchCache(),
176
+ boolean: new SearchCache(),
177
+ fuzzy: new SearchCache(),
178
+ };
179
+ /**
180
+ * Clear all search caches.
181
+ *
182
+ * Should be called when graph is modified to ensure cache consistency.
183
+ */
184
+ export function clearAllSearchCaches() {
185
+ searchCaches.basic.clear();
186
+ searchCaches.ranked.clear();
187
+ searchCaches.boolean.clear();
188
+ searchCaches.fuzzy.clear();
189
+ }
190
+ /**
191
+ * Get combined statistics for all caches.
192
+ */
193
+ export function getAllCacheStats() {
194
+ return {
195
+ basic: searchCaches.basic.getStats(),
196
+ ranked: searchCaches.ranked.getStats(),
197
+ boolean: searchCaches.boolean.getStats(),
198
+ fuzzy: searchCaches.fuzzy.getStats(),
199
+ };
200
+ }
201
+ /**
202
+ * Clean up expired entries in all caches.
203
+ */
204
+ export function cleanupAllCaches() {
205
+ searchCaches.basic.cleanupExpired();
206
+ searchCaches.ranked.cleanupExpired();
207
+ searchCaches.boolean.cleanupExpired();
208
+ searchCaches.fuzzy.cleanupExpired();
209
+ }
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Tag Normalization and Matching Utilities
3
+ *
4
+ * Centralizes tag operations to eliminate duplicate normalization logic
5
+ * across the codebase. All tags are normalized to lowercase for consistent matching.
6
+ */
7
+ /**
8
+ * Normalizes a single tag to lowercase and trimmed.
9
+ *
10
+ * @param tag - Tag to normalize
11
+ * @returns Normalized tag
12
+ */
13
+ export function normalizeTag(tag) {
14
+ return tag.toLowerCase().trim();
15
+ }
16
+ /**
17
+ * Normalizes an array of tags to lowercase.
18
+ * Handles undefined/null input gracefully.
19
+ *
20
+ * @param tags - Array of tags to normalize, or undefined
21
+ * @returns Normalized tags array, or empty array if input is undefined/null
22
+ */
23
+ export function normalizeTags(tags) {
24
+ if (!tags || tags.length === 0)
25
+ return [];
26
+ return tags.map(tag => tag.toLowerCase());
27
+ }
28
+ /**
29
+ * Checks if an entity's tags include any of the specified search tags.
30
+ * Both inputs are normalized before comparison.
31
+ *
32
+ * @param entityTags - Tags on the entity (may be undefined)
33
+ * @param searchTags - Tags to search for (may be undefined)
34
+ * @returns true if any search tag matches any entity tag, false if no match or either is empty
35
+ */
36
+ export function hasMatchingTag(entityTags, searchTags) {
37
+ if (!entityTags || entityTags.length === 0)
38
+ return false;
39
+ if (!searchTags || searchTags.length === 0)
40
+ return false;
41
+ const normalizedEntity = normalizeTags(entityTags);
42
+ const normalizedSearch = normalizeTags(searchTags);
43
+ return normalizedSearch.some(tag => normalizedEntity.includes(tag));
44
+ }
45
+ /**
46
+ * Checks if entity tags include ALL of the specified required tags.
47
+ *
48
+ * @param entityTags - Tags on the entity (may be undefined)
49
+ * @param requiredTags - All tags that must be present
50
+ * @returns true if all required tags are present
51
+ */
52
+ export function hasAllTags(entityTags, requiredTags) {
53
+ if (!entityTags || entityTags.length === 0)
54
+ return false;
55
+ if (requiredTags.length === 0)
56
+ return true;
57
+ const normalizedEntity = normalizeTags(entityTags);
58
+ return normalizeTags(requiredTags).every(tag => normalizedEntity.includes(tag));
59
+ }
60
+ /**
61
+ * Filters entities by tag match.
62
+ * Returns all entities if searchTags is empty or undefined.
63
+ *
64
+ * @param entities - Array of entities with optional tags property
65
+ * @param searchTags - Tags to filter by
66
+ * @returns Filtered entities that have at least one matching tag
67
+ */
68
+ export function filterByTags(entities, searchTags) {
69
+ if (!searchTags || searchTags.length === 0) {
70
+ return entities;
71
+ }
72
+ const normalizedSearch = normalizeTags(searchTags);
73
+ return entities.filter(entity => {
74
+ if (!entity.tags || entity.tags.length === 0)
75
+ return false;
76
+ const normalizedEntity = normalizeTags(entity.tags);
77
+ return normalizedSearch.some(tag => normalizedEntity.includes(tag));
78
+ });
79
+ }
80
+ /**
81
+ * Adds new tags to an existing tag array, avoiding duplicates.
82
+ * All tags are normalized to lowercase.
83
+ *
84
+ * @param existingTags - Current tags (may be undefined)
85
+ * @param newTags - Tags to add
86
+ * @returns Combined tags array with no duplicates
87
+ */
88
+ export function addUniqueTags(existingTags, newTags) {
89
+ const existing = normalizeTags(existingTags);
90
+ const toAdd = normalizeTags(newTags);
91
+ const uniqueNew = toAdd.filter(tag => !existing.includes(tag));
92
+ return [...existing, ...uniqueNew];
93
+ }
94
+ /**
95
+ * Removes specified tags from an existing tag array.
96
+ * Comparison is case-insensitive.
97
+ *
98
+ * @param existingTags - Current tags (may be undefined)
99
+ * @param tagsToRemove - Tags to remove
100
+ * @returns Tags array with specified tags removed
101
+ */
102
+ export function removeTags(existingTags, tagsToRemove) {
103
+ if (!existingTags || existingTags.length === 0)
104
+ return [];
105
+ const toRemoveNormalized = normalizeTags(tagsToRemove);
106
+ return existingTags.filter(tag => !toRemoveNormalized.includes(tag.toLowerCase()));
107
+ }
@@ -0,0 +1,90 @@
1
+ /**
2
+ * TF-IDF (Term Frequency-Inverse Document Frequency) Utilities
3
+ *
4
+ * Algorithms for calculating TF-IDF scores used in ranked search.
5
+ * TF-IDF measures how important a term is to a document in a collection.
6
+ *
7
+ * @module utils/tfidf
8
+ */
9
+ /**
10
+ * Calculate Term Frequency (TF) for a term in a document.
11
+ *
12
+ * TF = (Number of times term appears in document) / (Total terms in document)
13
+ *
14
+ * @param term - The search term
15
+ * @param document - The document text
16
+ * @returns Term frequency (0.0 to 1.0)
17
+ */
18
+ export function calculateTF(term, document) {
19
+ const termLower = term.toLowerCase();
20
+ const tokens = tokenize(document);
21
+ if (tokens.length === 0)
22
+ return 0;
23
+ const termCount = tokens.filter(t => t === termLower).length;
24
+ return termCount / tokens.length;
25
+ }
26
+ /**
27
+ * Calculate Inverse Document Frequency (IDF) for a term across documents.
28
+ *
29
+ * IDF = log(Total documents / Documents containing term)
30
+ *
31
+ * @param term - The search term
32
+ * @param documents - Array of document texts
33
+ * @returns Inverse document frequency
34
+ */
35
+ export function calculateIDF(term, documents) {
36
+ if (documents.length === 0)
37
+ return 0;
38
+ const termLower = term.toLowerCase();
39
+ const docsWithTerm = documents.filter(doc => tokenize(doc).includes(termLower)).length;
40
+ if (docsWithTerm === 0)
41
+ return 0;
42
+ return Math.log(documents.length / docsWithTerm);
43
+ }
44
+ /**
45
+ * Calculate TF-IDF score for a term in a document.
46
+ *
47
+ * TF-IDF = TF * IDF
48
+ *
49
+ * Higher scores indicate more important/relevant terms.
50
+ *
51
+ * @param term - The search term
52
+ * @param document - The document text
53
+ * @param documents - Array of all documents
54
+ * @returns TF-IDF score
55
+ */
56
+ export function calculateTFIDF(term, document, documents) {
57
+ const tf = calculateTF(term, document);
58
+ const idf = calculateIDF(term, documents);
59
+ return tf * idf;
60
+ }
61
+ /**
62
+ * Tokenize text into lowercase words.
63
+ *
64
+ * Splits on whitespace and removes punctuation.
65
+ *
66
+ * @param text - Text to tokenize
67
+ * @returns Array of lowercase tokens
68
+ */
69
+ export function tokenize(text) {
70
+ return text
71
+ .toLowerCase()
72
+ .replace(/[^\w\s]/g, ' ')
73
+ .split(/\s+/)
74
+ .filter(token => token.length > 0);
75
+ }
76
+ /**
77
+ * Calculate TF-IDF scores for multiple search terms.
78
+ *
79
+ * @param terms - Array of search terms
80
+ * @param document - The document text
81
+ * @param documents - Array of all documents
82
+ * @returns Map of term to TF-IDF score
83
+ */
84
+ export function calculateMultiTermTFIDF(terms, document, documents) {
85
+ const scores = new Map();
86
+ for (const term of terms) {
87
+ scores.set(term, calculateTFIDF(term, document, documents));
88
+ }
89
+ return scores;
90
+ }
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Zod Schema Validation Helper
3
+ *
4
+ * Centralizes Zod validation patterns to eliminate redundant error formatting
5
+ * and validation logic across the codebase.
6
+ */
7
+ import { ValidationError } from './errors.js';
8
+ /**
9
+ * Formats Zod errors into human-readable strings.
10
+ *
11
+ * @param error - Zod error object
12
+ * @returns Array of formatted error messages
13
+ */
14
+ export function formatZodErrors(error) {
15
+ return error.issues.map(issue => {
16
+ const path = issue.path.length > 0 ? `${issue.path.join('.')}: ` : '';
17
+ return `${path}${issue.message}`;
18
+ });
19
+ }
20
+ /**
21
+ * Validates data against a Zod schema and returns the typed result.
22
+ * Throws ValidationError with formatted error messages on failure.
23
+ *
24
+ * @param data - The data to validate
25
+ * @param schema - The Zod schema to validate against
26
+ * @param errorMessage - Custom error message prefix (default: 'Validation failed')
27
+ * @returns The validated and typed data
28
+ * @throws ValidationError if validation fails
29
+ *
30
+ * @example
31
+ * ```typescript
32
+ * const entities = validateWithSchema(
33
+ * input,
34
+ * BatchCreateEntitiesSchema,
35
+ * 'Invalid entity data'
36
+ * );
37
+ * ```
38
+ */
39
+ export function validateWithSchema(data, schema, errorMessage = 'Validation failed') {
40
+ const result = schema.safeParse(data);
41
+ if (!result.success) {
42
+ const errors = formatZodErrors(result.error);
43
+ throw new ValidationError(errorMessage, errors);
44
+ }
45
+ return result.data;
46
+ }
47
+ /**
48
+ * Validates data and returns a result object instead of throwing.
49
+ * Useful when you want to handle validation errors gracefully.
50
+ *
51
+ * @param data - The data to validate
52
+ * @param schema - The Zod schema to validate against
53
+ * @returns Result object with success status and either data or errors
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * const result = validateSafe(input, EntitySchema);
58
+ * if (result.success) {
59
+ * console.log(result.data);
60
+ * } else {
61
+ * console.error(result.errors);
62
+ * }
63
+ * ```
64
+ */
65
+ export function validateSafe(data, schema) {
66
+ const result = schema.safeParse(data);
67
+ if (result.success) {
68
+ return { success: true, data: result.data };
69
+ }
70
+ return { success: false, errors: formatZodErrors(result.error) };
71
+ }
72
+ /**
73
+ * Validates an array of items against a schema.
74
+ * Returns detailed information about which items failed validation.
75
+ *
76
+ * @param items - Array of items to validate
77
+ * @param schema - Zod schema for individual items
78
+ * @param errorMessage - Custom error message prefix
79
+ * @returns Array of validated items
80
+ * @throws ValidationError if any item fails validation
81
+ */
82
+ export function validateArrayWithSchema(items, schema, errorMessage = 'Array validation failed') {
83
+ const errors = [];
84
+ const validated = [];
85
+ for (let i = 0; i < items.length; i++) {
86
+ const result = schema.safeParse(items[i]);
87
+ if (result.success) {
88
+ validated.push(result.data);
89
+ }
90
+ else {
91
+ const itemErrors = formatZodErrors(result.error);
92
+ errors.push(...itemErrors.map(e => `[${i}] ${e}`));
93
+ }
94
+ }
95
+ if (errors.length > 0) {
96
+ throw new ValidationError(errorMessage, errors);
97
+ }
98
+ return validated;
99
+ }