@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,581 @@
1
+ /**
2
+ * Validation Utilities for Supermemory Clone
3
+ *
4
+ * Provides Zod schema validation helpers, common validators, and security-focused schemas.
5
+ *
6
+ * Security Features:
7
+ * - Content size validation (50KB default)
8
+ * - Path traversal prevention
9
+ * - URL protocol whitelisting (http, https only)
10
+ * - XSS-safe string schemas with auto-sanitization
11
+ */
12
+
13
+ import { z, ZodError, ZodSchema } from 'zod'
14
+ import { ValidationError } from './errors.js'
15
+ import { sanitizeHtml, sanitizeForStorage, isPathSafe, sanitizeUrl } from './sanitization.js'
16
+
17
+ // ============================================================================
18
+ // Security Constants
19
+ // ============================================================================
20
+
21
+ /**
22
+ * Maximum content size in bytes (50KB).
23
+ */
24
+ export const MAX_CONTENT_SIZE = 50 * 1024
25
+
26
+ /**
27
+ * Maximum query string length (10KB).
28
+ */
29
+ export const MAX_QUERY_LENGTH = 10 * 1024
30
+
31
+ /**
32
+ * Maximum metadata JSON size (10KB).
33
+ */
34
+ export const MAX_METADATA_SIZE = 10 * 1024
35
+
36
+ /**
37
+ * Maximum container tag length.
38
+ */
39
+ export const MAX_CONTAINER_TAG_LENGTH = 100
40
+
41
+ /**
42
+ * Allowed URL protocols.
43
+ */
44
+ export const ALLOWED_URL_PROTOCOLS = ['http:', 'https:']
45
+
46
+ // ============================================================================
47
+ // Common Schemas
48
+ // ============================================================================
49
+
50
+ /**
51
+ * Non-empty string schema
52
+ */
53
+ export const nonEmptyString = z.string().min(1, 'Value cannot be empty')
54
+
55
+ /**
56
+ * UUID schema
57
+ */
58
+ export const uuidSchema = z.string().uuid('Invalid UUID format')
59
+
60
+ /**
61
+ * Positive integer schema
62
+ */
63
+ export const positiveInt = z.number().int().positive()
64
+
65
+ /**
66
+ * Non-negative integer schema
67
+ */
68
+ export const nonNegativeInt = z.number().int().nonnegative()
69
+
70
+ /**
71
+ * Confidence score schema (0-1)
72
+ */
73
+ export const confidenceScore = z.number().min(0, 'Confidence must be at least 0').max(1, 'Confidence must be at most 1')
74
+
75
+ /**
76
+ * Container tag schema
77
+ */
78
+ export const containerTagSchema = z
79
+ .string()
80
+ .min(1, 'Container tag cannot be empty')
81
+ .max(100, 'Container tag must be at most 100 characters')
82
+ .regex(/^[a-zA-Z0-9_-]+$/, 'Container tag can only contain alphanumeric characters, underscores, and hyphens')
83
+
84
+ /**
85
+ * Pagination options schema
86
+ */
87
+ export const paginationSchema = z.object({
88
+ limit: z.number().int().min(1).max(1000).default(100),
89
+ offset: z.number().int().min(0).default(0),
90
+ })
91
+
92
+ /**
93
+ * Date range schema
94
+ */
95
+ export const dateRangeSchema = z
96
+ .object({
97
+ from: z.date().optional(),
98
+ to: z.date().optional(),
99
+ })
100
+ .refine(
101
+ (data) => {
102
+ if (data.from && data.to) {
103
+ return data.from <= data.to
104
+ }
105
+ return true
106
+ },
107
+ { message: 'Start date must be before or equal to end date' }
108
+ )
109
+
110
+ // ============================================================================
111
+ // Memory Schemas
112
+ // ============================================================================
113
+
114
+ /**
115
+ * Memory type enum schema
116
+ */
117
+ export const memoryTypeSchema = z.enum(['fact', 'event', 'preference', 'skill', 'relationship', 'context', 'note'])
118
+
119
+ /**
120
+ * Relationship type enum schema
121
+ */
122
+ export const relationshipTypeSchema = z.enum(['updates', 'extends', 'derives', 'contradicts', 'related', 'supersedes'])
123
+
124
+ /**
125
+ * Memory creation input schema
126
+ */
127
+ export const createMemoryInputSchema = z.object({
128
+ content: nonEmptyString.describe('Memory content'),
129
+ type: memoryTypeSchema.optional().describe('Memory type'),
130
+ containerTag: containerTagSchema.optional().describe('Container tag'),
131
+ metadata: z.record(z.unknown()).optional().describe('Additional metadata'),
132
+ })
133
+
134
+ /**
135
+ * Memory query options schema
136
+ */
137
+ export const memoryQueryOptionsSchema = z.object({
138
+ containerTag: containerTagSchema.optional(),
139
+ type: memoryTypeSchema.optional(),
140
+ latestOnly: z.boolean().optional(),
141
+ minConfidence: confidenceScore.optional(),
142
+ limit: positiveInt.max(1000).optional().default(100),
143
+ offset: nonNegativeInt.optional().default(0),
144
+ sortBy: z.enum(['createdAt', 'updatedAt', 'confidence']).optional(),
145
+ sortOrder: z.enum(['asc', 'desc']).optional(),
146
+ })
147
+
148
+ // ============================================================================
149
+ // Profile Schemas
150
+ // ============================================================================
151
+
152
+ /**
153
+ * Fact type enum schema
154
+ */
155
+ export const factTypeSchema = z.enum(['static', 'dynamic'])
156
+
157
+ /**
158
+ * Fact category enum schema
159
+ */
160
+ export const factCategorySchema = z.enum([
161
+ 'identity',
162
+ 'preference',
163
+ 'skill',
164
+ 'background',
165
+ 'relationship',
166
+ 'project',
167
+ 'goal',
168
+ 'context',
169
+ 'other',
170
+ ])
171
+
172
+ /**
173
+ * Profile fact input schema
174
+ */
175
+ export const profileFactInputSchema = z.object({
176
+ content: nonEmptyString.describe('Fact content'),
177
+ type: factTypeSchema.optional().describe('Fact type'),
178
+ category: factCategorySchema.optional().describe('Fact category'),
179
+ confidence: confidenceScore.optional().describe('Confidence score'),
180
+ sourceId: z.string().optional().describe('Source identifier'),
181
+ })
182
+
183
+ // ============================================================================
184
+ // Search Schemas
185
+ // ============================================================================
186
+
187
+ /**
188
+ * Search mode enum schema
189
+ */
190
+ export const searchModeSchema = z.enum(['vector', 'memory', 'fulltext', 'hybrid'])
191
+
192
+ /**
193
+ * Metadata filter operator schema
194
+ */
195
+ export const filterOperatorSchema = z.enum(['eq', 'ne', 'gt', 'gte', 'lt', 'lte', 'contains', 'startsWith'])
196
+
197
+ /**
198
+ * Metadata filter schema
199
+ */
200
+ export const metadataFilterSchema = z.object({
201
+ key: nonEmptyString.describe('Metadata key to filter'),
202
+ value: z.union([z.string(), z.number(), z.boolean()]).describe('Filter value'),
203
+ operator: filterOperatorSchema.optional().default('eq').describe('Comparison operator'),
204
+ })
205
+
206
+ /**
207
+ * Search options schema
208
+ */
209
+ export const searchOptionsSchema = z.object({
210
+ query: nonEmptyString.describe('Search query'),
211
+ containerTag: containerTagSchema.optional(),
212
+ searchMode: searchModeSchema.optional().default('hybrid'),
213
+ limit: positiveInt.max(100).optional().default(10),
214
+ threshold: confidenceScore.optional().default(0.7),
215
+ rerank: z.boolean().optional().default(true),
216
+ rewriteQuery: z.boolean().optional().default(true),
217
+ filters: z.array(metadataFilterSchema).optional(),
218
+ dateRange: dateRangeSchema.optional(),
219
+ includeEmbeddings: z.boolean().optional().default(false),
220
+ })
221
+
222
+ // ============================================================================
223
+ // Extraction Schemas
224
+ // ============================================================================
225
+
226
+ /**
227
+ * Content type enum schema
228
+ */
229
+ export const contentTypeSchema = z.enum(['text', 'url', 'markdown', 'html', 'json', 'pdf', 'image', 'unknown'])
230
+
231
+ /**
232
+ * Chunking strategy enum schema
233
+ */
234
+ export const chunkingStrategySchema = z.enum(['sentence', 'paragraph', 'fixed', 'semantic', 'sliding_window'])
235
+
236
+ /**
237
+ * Document input schema
238
+ */
239
+ export const documentInputSchema = z.object({
240
+ content: nonEmptyString.describe('Document content'),
241
+ containerTag: containerTagSchema.optional(),
242
+ contentType: contentTypeSchema.optional(),
243
+ metadata: z.record(z.unknown()).optional(),
244
+ chunkingStrategy: chunkingStrategySchema.optional(),
245
+ })
246
+
247
+ // ============================================================================
248
+ // Validation Functions
249
+ // ============================================================================
250
+
251
+ /**
252
+ * Validate input against a schema
253
+ * @throws ValidationError if validation fails
254
+ */
255
+ export function validate<T>(schema: ZodSchema<T>, input: unknown): T {
256
+ try {
257
+ return schema.parse(input)
258
+ } catch (error) {
259
+ if (error instanceof ZodError) {
260
+ throw ValidationError.fromZodError(error)
261
+ }
262
+ throw error
263
+ }
264
+ }
265
+
266
+ /**
267
+ * Validate input against a schema, returning result without throwing
268
+ */
269
+ export function validateSafe<T>(
270
+ schema: ZodSchema<T>,
271
+ input: unknown
272
+ ): { success: true; data: T } | { success: false; error: ValidationError } {
273
+ try {
274
+ const data = schema.parse(input)
275
+ return { success: true, data }
276
+ } catch (error) {
277
+ if (error instanceof ZodError) {
278
+ return { success: false, error: ValidationError.fromZodError(error) }
279
+ }
280
+ return {
281
+ success: false,
282
+ error: new ValidationError(error instanceof Error ? error.message : 'Unknown validation error'),
283
+ }
284
+ }
285
+ }
286
+
287
+ /**
288
+ * Validate and coerce input, applying defaults
289
+ */
290
+ export function validateWithDefaults<T>(schema: ZodSchema<T>, input: unknown): T {
291
+ return validate(schema, input)
292
+ }
293
+
294
+ /**
295
+ * Create a validator function for a schema
296
+ */
297
+ export function createValidator<T>(schema: ZodSchema<T>): (input: unknown) => T {
298
+ return (input: unknown) => validate(schema, input)
299
+ }
300
+
301
+ /**
302
+ * Assert that a value is defined (not null or undefined)
303
+ */
304
+ export function assertDefined<T>(value: T | null | undefined, name: string): asserts value is T {
305
+ if (value === null || value === undefined) {
306
+ throw new ValidationError(`${name} is required`, { [name]: ['Value is required'] })
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Assert that a string is non-empty
312
+ */
313
+ export function assertNonEmpty(value: string | null | undefined, name: string): asserts value is string {
314
+ assertDefined(value, name)
315
+ if (value.trim().length === 0) {
316
+ throw new ValidationError(`${name} cannot be empty`, { [name]: ['Value cannot be empty'] })
317
+ }
318
+ }
319
+
320
+ // ============================================================================
321
+ // Custom Validators
322
+ // ============================================================================
323
+
324
+ /**
325
+ * Validate memory content
326
+ */
327
+ export function validateMemoryContent(content: string): void {
328
+ if (!content || content.trim().length === 0) {
329
+ throw new ValidationError('Memory content cannot be empty', {
330
+ content: ['Content is required and cannot be empty'],
331
+ })
332
+ }
333
+ if (content.length > 100000) {
334
+ throw new ValidationError('Memory content exceeds maximum length', {
335
+ content: ['Content must be less than 100,000 characters'],
336
+ })
337
+ }
338
+ }
339
+
340
+ /**
341
+ * Validate search query
342
+ */
343
+ export function validateSearchQuery(query: string): void {
344
+ if (!query || query.trim().length === 0) {
345
+ throw new ValidationError('Search query cannot be empty', {
346
+ query: ['Query is required and cannot be empty'],
347
+ })
348
+ }
349
+ if (query.length > 10000) {
350
+ throw new ValidationError('Search query exceeds maximum length', {
351
+ query: ['Query must be less than 10,000 characters'],
352
+ })
353
+ }
354
+ }
355
+
356
+ /**
357
+ * Validate container tag
358
+ */
359
+ export function validateContainerTag(containerTag: string | undefined): void {
360
+ if (containerTag !== undefined) {
361
+ validate(containerTagSchema, containerTag)
362
+ }
363
+ }
364
+
365
+ // ============================================================================
366
+ // Security Schemas
367
+ // ============================================================================
368
+
369
+ /**
370
+ * Content with size limit schema (50KB default).
371
+ * Use this for user-provided content fields.
372
+ */
373
+ export const boundedContentSchema = z
374
+ .string()
375
+ .min(1, 'Content cannot be empty')
376
+ .max(MAX_CONTENT_SIZE, `Content must be at most ${MAX_CONTENT_SIZE} characters (50KB)`)
377
+
378
+ /**
379
+ * Query with size limit schema (10KB).
380
+ * Use this for search queries.
381
+ */
382
+ export const boundedQuerySchema = z
383
+ .string()
384
+ .min(1, 'Query cannot be empty')
385
+ .max(MAX_QUERY_LENGTH, `Query must be at most ${MAX_QUERY_LENGTH} characters`)
386
+
387
+ /**
388
+ * Safe path schema that prevents path traversal attacks.
389
+ * Rejects paths containing:
390
+ * - Parent directory references (..)
391
+ * - Absolute paths (starting with / or drive letters)
392
+ * - URL-encoded traversal sequences
393
+ * - Null bytes and control characters
394
+ */
395
+ export const safePathSchema = z
396
+ .string()
397
+ .min(1, 'Path cannot be empty')
398
+ .max(1024, 'Path must be at most 1024 characters')
399
+ .refine(
400
+ (path) => isPathSafe(path),
401
+ 'Path contains invalid characters or traversal sequences (e.g., "..", absolute paths)'
402
+ )
403
+
404
+ /**
405
+ * Safe URL schema with protocol whitelist.
406
+ * Only allows http and https protocols.
407
+ * Rejects javascript:, data:, and other potentially dangerous schemes.
408
+ */
409
+ export const safeUrlSchema = z
410
+ .string()
411
+ .url('Invalid URL format')
412
+ .refine(
413
+ (url) => {
414
+ try {
415
+ const parsed = new URL(url)
416
+ return ALLOWED_URL_PROTOCOLS.includes(parsed.protocol)
417
+ } catch {
418
+ return false
419
+ }
420
+ },
421
+ { message: 'URL must use http or https protocol' }
422
+ )
423
+ .transform((url) => sanitizeUrl(url))
424
+
425
+ /**
426
+ * Optional safe URL schema.
427
+ */
428
+ export const optionalSafeUrlSchema = safeUrlSchema.optional()
429
+
430
+ /**
431
+ * Sanitized string schema that auto-strips XSS vectors.
432
+ * Content is sanitized during parsing, removing dangerous HTML/JavaScript.
433
+ */
434
+ export const sanitizedStringSchema = z.string().transform((val) => sanitizeHtml(val))
435
+
436
+ /**
437
+ * Strictly sanitized string schema for storage.
438
+ * Removes all HTML, preserving only plain text.
439
+ */
440
+ export const sanitizedStorageStringSchema = z.string().transform((val) => sanitizeForStorage(val))
441
+
442
+ /**
443
+ * Metadata schema with size limit (10KB JSON).
444
+ * Ensures metadata doesn't exceed reasonable size.
445
+ */
446
+ export const boundedMetadataSchema = z
447
+ .record(z.unknown())
448
+ .optional()
449
+ .refine(
450
+ (metadata) => {
451
+ if (!metadata) return true
452
+ try {
453
+ const jsonSize = new TextEncoder().encode(JSON.stringify(metadata)).length
454
+ return jsonSize <= MAX_METADATA_SIZE
455
+ } catch {
456
+ return false
457
+ }
458
+ },
459
+ { message: `Metadata must be at most ${MAX_METADATA_SIZE} bytes (10KB)` }
460
+ )
461
+
462
+ /**
463
+ * ID schema - alphanumeric with hyphens, underscores, max 255 chars.
464
+ */
465
+ export const safeIdSchema = z
466
+ .string()
467
+ .min(1, 'ID cannot be empty')
468
+ .max(255, 'ID must be at most 255 characters')
469
+ .regex(/^[a-zA-Z0-9_-]+$/, 'ID can only contain alphanumeric characters, underscores, and hyphens')
470
+
471
+ /**
472
+ * Email schema with sanitization.
473
+ */
474
+ export const safeEmailSchema = z
475
+ .string()
476
+ .email('Invalid email format')
477
+ .max(255, 'Email must be at most 255 characters')
478
+ .transform((email) => email.toLowerCase().trim())
479
+
480
+ // ============================================================================
481
+ // Composite Security Schemas
482
+ // ============================================================================
483
+
484
+ /**
485
+ * Secure memory content input schema with all security validations.
486
+ */
487
+ export const secureMemoryInputSchema = z.object({
488
+ content: boundedContentSchema.describe('Memory content (max 50KB)'),
489
+ type: memoryTypeSchema.optional().describe('Memory type'),
490
+ containerTag: containerTagSchema.optional().describe('Container tag (max 100 chars)'),
491
+ metadata: boundedMetadataSchema.describe('Additional metadata (max 10KB)'),
492
+ })
493
+
494
+ /**
495
+ * Secure search query schema with input validation.
496
+ */
497
+ export const secureSearchQuerySchema = z.object({
498
+ query: boundedQuerySchema.describe('Search query (max 10KB)'),
499
+ containerTag: containerTagSchema.optional(),
500
+ limit: positiveInt.max(100).optional().default(10),
501
+ threshold: confidenceScore.optional().default(0.7),
502
+ })
503
+
504
+ /**
505
+ * Secure document input schema for API submissions.
506
+ */
507
+ export const secureDocumentInputSchema = z.object({
508
+ content: boundedContentSchema.describe('Document content (max 50KB)'),
509
+ containerTag: containerTagSchema.optional().describe('Container tag'),
510
+ contentType: contentTypeSchema.optional().describe('Content type'),
511
+ sourceUrl: optionalSafeUrlSchema.describe('Source URL (http/https only)'),
512
+ title: z.string().max(500, 'Title must be at most 500 characters').optional(),
513
+ metadata: boundedMetadataSchema.describe('Metadata (max 10KB)'),
514
+ })
515
+
516
+ // ============================================================================
517
+ // Validation Helpers for Security
518
+ // ============================================================================
519
+
520
+ /**
521
+ * Validates that content does not exceed maximum size.
522
+ * @throws ValidationError if content is too large
523
+ */
524
+ export function validateContentSize(content: string, maxSize = MAX_CONTENT_SIZE): void {
525
+ const size = new TextEncoder().encode(content).length
526
+ if (size > maxSize) {
527
+ throw new ValidationError(`Content size ${size} bytes exceeds maximum of ${maxSize} bytes`, {
528
+ content: [`Content must be at most ${maxSize} bytes`],
529
+ })
530
+ }
531
+ }
532
+
533
+ /**
534
+ * Validates that a path is safe (no traversal attacks).
535
+ * @throws ValidationError if path is unsafe
536
+ */
537
+ export function validatePath(path: string): void {
538
+ if (!isPathSafe(path)) {
539
+ throw new ValidationError('Path contains invalid characters or traversal sequences', {
540
+ path: ['Path cannot contain "..", absolute paths, or control characters'],
541
+ })
542
+ }
543
+ }
544
+
545
+ /**
546
+ * Validates that a URL uses allowed protocols.
547
+ * @throws ValidationError if URL is unsafe
548
+ */
549
+ export function validateUrl(url: string): void {
550
+ try {
551
+ const parsed = new URL(url)
552
+ if (!ALLOWED_URL_PROTOCOLS.includes(parsed.protocol)) {
553
+ throw new ValidationError(`URL protocol "${parsed.protocol}" is not allowed`, {
554
+ url: ['URL must use http or https protocol'],
555
+ })
556
+ }
557
+ } catch (error) {
558
+ if (error instanceof ValidationError) throw error
559
+ throw new ValidationError('Invalid URL format', { url: ['Must be a valid URL'] })
560
+ }
561
+ }
562
+
563
+ /**
564
+ * Validates metadata size doesn't exceed limit.
565
+ * @throws ValidationError if metadata is too large
566
+ */
567
+ export function validateMetadataSize(metadata: Record<string, unknown> | undefined): void {
568
+ if (!metadata) return
569
+
570
+ try {
571
+ const size = new TextEncoder().encode(JSON.stringify(metadata)).length
572
+ if (size > MAX_METADATA_SIZE) {
573
+ throw new ValidationError(`Metadata size ${size} bytes exceeds maximum of ${MAX_METADATA_SIZE} bytes`, {
574
+ metadata: [`Metadata must be at most ${MAX_METADATA_SIZE} bytes (10KB)`],
575
+ })
576
+ }
577
+ } catch (error) {
578
+ if (error instanceof ValidationError) throw error
579
+ throw new ValidationError('Invalid metadata format', { metadata: ['Metadata must be valid JSON'] })
580
+ }
581
+ }