npm - @twelvehart/supermemory-runtime - Versions diffs - 1.0.0-next.0 - Mend

@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

package/.env.example +57 -0
package/README.md +374 -0
package/dist/index.js +189 -0
package/dist/mcp/index.js +1132 -0
package/docker-compose.prod.yml +91 -0
package/docker-compose.yml +358 -0
package/drizzle/0000_dapper_the_professor.sql +159 -0
package/drizzle/0001_api_keys.sql +51 -0
package/drizzle/meta/0000_snapshot.json +1532 -0
package/drizzle/meta/_journal.json +13 -0
package/drizzle.config.ts +20 -0
package/package.json +114 -0
package/scripts/add-extraction-job.ts +122 -0
package/scripts/benchmark-pgvector.ts +122 -0
package/scripts/bootstrap.sh +209 -0
package/scripts/check-runtime-pack.ts +111 -0
package/scripts/claude-mcp-config.ts +336 -0
package/scripts/docker-entrypoint.sh +183 -0
package/scripts/doctor.ts +377 -0
package/scripts/init-db.sql +33 -0
package/scripts/install.sh +1110 -0
package/scripts/mcp-setup.ts +271 -0
package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
package/scripts/migrations/003_create_hnsw_index.sql +94 -0
package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
package/scripts/migrations/005_create_chunks_table.sql +95 -0
package/scripts/migrations/006_create_processing_queue.sql +45 -0
package/scripts/migrations/generate_test_data.sql +42 -0
package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
package/scripts/migrations/run_migrations.sh +286 -0
package/scripts/migrations/test_hnsw_index.sql +255 -0
package/scripts/pre-commit-secrets +282 -0
package/scripts/run-extraction-worker.ts +46 -0
package/scripts/run-phase1-tests.sh +291 -0
package/scripts/setup.ts +222 -0
package/scripts/smoke-install.sh +12 -0
package/scripts/test-health-endpoint.sh +328 -0
package/src/api/index.ts +2 -0
package/src/api/middleware/auth.ts +80 -0
package/src/api/middleware/csrf.ts +308 -0
package/src/api/middleware/errorHandler.ts +166 -0
package/src/api/middleware/rateLimit.ts +360 -0
package/src/api/middleware/validation.ts +514 -0
package/src/api/routes/documents.ts +286 -0
package/src/api/routes/profiles.ts +237 -0
package/src/api/routes/search.ts +71 -0
package/src/api/stores/index.ts +58 -0
package/src/config/bootstrap-env.ts +3 -0
package/src/config/env.ts +71 -0
package/src/config/feature-flags.ts +25 -0
package/src/config/index.ts +140 -0
package/src/config/secrets.config.ts +291 -0
package/src/db/client.ts +92 -0
package/src/db/index.ts +73 -0
package/src/db/postgres.ts +72 -0
package/src/db/schema/chunks.schema.ts +31 -0
package/src/db/schema/containers.schema.ts +46 -0
package/src/db/schema/documents.schema.ts +49 -0
package/src/db/schema/embeddings.schema.ts +32 -0
package/src/db/schema/index.ts +11 -0
package/src/db/schema/memories.schema.ts +72 -0
package/src/db/schema/profiles.schema.ts +34 -0
package/src/db/schema/queue.schema.ts +59 -0
package/src/db/schema/relationships.schema.ts +42 -0
package/src/db/schema.ts +223 -0
package/src/db/worker-connection.ts +47 -0
package/src/index.ts +235 -0
package/src/mcp/CLAUDE.md +1 -0
package/src/mcp/index.ts +1380 -0
package/src/mcp/legacyState.ts +22 -0
package/src/mcp/rateLimit.ts +358 -0
package/src/mcp/resources.ts +309 -0
package/src/mcp/results.ts +104 -0
package/src/mcp/tools.ts +401 -0
package/src/queues/config.ts +119 -0
package/src/queues/index.ts +289 -0
package/src/sdk/client.ts +225 -0
package/src/sdk/errors.ts +266 -0
package/src/sdk/http.ts +560 -0
package/src/sdk/index.ts +244 -0
package/src/sdk/resources/base.ts +65 -0
package/src/sdk/resources/connections.ts +204 -0
package/src/sdk/resources/documents.ts +163 -0
package/src/sdk/resources/index.ts +10 -0
package/src/sdk/resources/memories.ts +150 -0
package/src/sdk/resources/search.ts +60 -0
package/src/sdk/resources/settings.ts +36 -0
package/src/sdk/types.ts +674 -0
package/src/services/chunking/index.ts +451 -0
package/src/services/chunking.service.ts +650 -0
package/src/services/csrf.service.ts +252 -0
package/src/services/documents.repository.ts +219 -0
package/src/services/documents.service.ts +191 -0
package/src/services/embedding.service.ts +404 -0
package/src/services/extraction.service.ts +300 -0
package/src/services/extractors/code.extractor.ts +451 -0
package/src/services/extractors/index.ts +9 -0
package/src/services/extractors/markdown.extractor.ts +461 -0
package/src/services/extractors/pdf.extractor.ts +315 -0
package/src/services/extractors/text.extractor.ts +118 -0
package/src/services/extractors/url.extractor.ts +243 -0
package/src/services/index.ts +235 -0
package/src/services/ingestion.service.ts +177 -0
package/src/services/llm/anthropic.ts +400 -0
package/src/services/llm/base.ts +460 -0
package/src/services/llm/contradiction-detector.service.ts +526 -0
package/src/services/llm/heuristics.ts +148 -0
package/src/services/llm/index.ts +309 -0
package/src/services/llm/memory-classifier.service.ts +383 -0
package/src/services/llm/memory-extension-detector.service.ts +523 -0
package/src/services/llm/mock.ts +470 -0
package/src/services/llm/openai.ts +398 -0
package/src/services/llm/prompts.ts +438 -0
package/src/services/llm/types.ts +373 -0
package/src/services/memory.repository.ts +1769 -0
package/src/services/memory.service.ts +1338 -0
package/src/services/memory.types.ts +234 -0
package/src/services/persistence/index.ts +295 -0
package/src/services/pipeline.service.ts +509 -0
package/src/services/profile.repository.ts +436 -0
package/src/services/profile.service.ts +560 -0
package/src/services/profile.types.ts +270 -0
package/src/services/relationships/detector.ts +1128 -0
package/src/services/relationships/index.ts +268 -0
package/src/services/relationships/memory-integration.ts +459 -0
package/src/services/relationships/strategies.ts +132 -0
package/src/services/relationships/types.ts +370 -0
package/src/services/search.service.ts +761 -0
package/src/services/search.types.ts +220 -0
package/src/services/secrets.service.ts +384 -0
package/src/services/vectorstore/base.ts +327 -0
package/src/services/vectorstore/index.ts +444 -0
package/src/services/vectorstore/memory.ts +286 -0
package/src/services/vectorstore/migration.ts +295 -0
package/src/services/vectorstore/mock.ts +403 -0
package/src/services/vectorstore/pgvector.ts +695 -0
package/src/services/vectorstore/types.ts +247 -0
package/src/startup.ts +389 -0
package/src/types/api.types.ts +193 -0
package/src/types/document.types.ts +103 -0
package/src/types/index.ts +241 -0
package/src/types/profile.base.ts +133 -0
package/src/utils/errors.ts +447 -0
package/src/utils/id.ts +15 -0
package/src/utils/index.ts +101 -0
package/src/utils/logger.ts +313 -0
package/src/utils/sanitization.ts +501 -0
package/src/utils/secret-validation.ts +273 -0
package/src/utils/synonyms.ts +188 -0
package/src/utils/validation.ts +581 -0
package/src/workers/chunking.worker.ts +242 -0
package/src/workers/embedding.worker.ts +358 -0
package/src/workers/extraction.worker.ts +346 -0
package/src/workers/indexing.worker.ts +505 -0
package/tsconfig.json +38 -0

package/src/utils/validation.ts ADDED Viewed

@@ -0,0 +1,581 @@
+/**
+ * Validation Utilities for Supermemory Clone
+ *
+ * Provides Zod schema validation helpers, common validators, and security-focused schemas.
+ *
+ * Security Features:
+ * - Content size validation (50KB default)
+ * - Path traversal prevention
+ * - URL protocol whitelisting (http, https only)
+ * - XSS-safe string schemas with auto-sanitization
+ */
+import { z, ZodError, ZodSchema } from 'zod'
+import { ValidationError } from './errors.js'
+import { sanitizeHtml, sanitizeForStorage, isPathSafe, sanitizeUrl } from './sanitization.js'
+// ============================================================================
+// Security Constants
+// ============================================================================
+/**
+ * Maximum content size in bytes (50KB).
+ */
+export const MAX_CONTENT_SIZE = 50 * 1024
+/**
+ * Maximum query string length (10KB).
+ */
+export const MAX_QUERY_LENGTH = 10 * 1024
+/**
+ * Maximum metadata JSON size (10KB).
+ */
+export const MAX_METADATA_SIZE = 10 * 1024
+/**
+ * Maximum container tag length.
+ */
+export const MAX_CONTAINER_TAG_LENGTH = 100
+/**
+ * Allowed URL protocols.
+ */
+export const ALLOWED_URL_PROTOCOLS = ['http:', 'https:']
+// ============================================================================
+// Common Schemas
+// ============================================================================
+/**
+ * Non-empty string schema
+ */
+export const nonEmptyString = z.string().min(1, 'Value cannot be empty')
+/**
+ * UUID schema
+ */
+export const uuidSchema = z.string().uuid('Invalid UUID format')
+/**
+ * Positive integer schema
+ */
+export const positiveInt = z.number().int().positive()
+/**
+ * Non-negative integer schema
+ */
+export const nonNegativeInt = z.number().int().nonnegative()
+/**
+ * Confidence score schema (0-1)
+ */
+export const confidenceScore = z.number().min(0, 'Confidence must be at least 0').max(1, 'Confidence must be at most 1')
+/**
+ * Container tag schema
+ */
+export const containerTagSchema = z
+  .string()
+  .min(1, 'Container tag cannot be empty')
+  .max(100, 'Container tag must be at most 100 characters')
+  .regex(/^[a-zA-Z0-9_-]+$/, 'Container tag can only contain alphanumeric characters, underscores, and hyphens')
+/**
+ * Pagination options schema
+ */
+export const paginationSchema = z.object({
+  limit: z.number().int().min(1).max(1000).default(100),
+  offset: z.number().int().min(0).default(0),
+})
+/**
+ * Date range schema
+ */
+export const dateRangeSchema = z
+  .object({
+    from: z.date().optional(),
+    to: z.date().optional(),
+  })
+  .refine(
+    (data) => {
+      if (data.from && data.to) {
+        return data.from <= data.to
+      }
+      return true
+    },
+    { message: 'Start date must be before or equal to end date' }
+  )
+// ============================================================================
+// Memory Schemas
+// ============================================================================
+/**
+ * Memory type enum schema
+ */
+export const memoryTypeSchema = z.enum(['fact', 'event', 'preference', 'skill', 'relationship', 'context', 'note'])
+/**
+ * Relationship type enum schema
+ */
+export const relationshipTypeSchema = z.enum(['updates', 'extends', 'derives', 'contradicts', 'related', 'supersedes'])
+/**
+ * Memory creation input schema
+ */
+export const createMemoryInputSchema = z.object({
+  content: nonEmptyString.describe('Memory content'),
+  type: memoryTypeSchema.optional().describe('Memory type'),
+  containerTag: containerTagSchema.optional().describe('Container tag'),
+  metadata: z.record(z.unknown()).optional().describe('Additional metadata'),
+})
+/**
+ * Memory query options schema
+ */
+export const memoryQueryOptionsSchema = z.object({
+  containerTag: containerTagSchema.optional(),
+  type: memoryTypeSchema.optional(),
+  latestOnly: z.boolean().optional(),
+  minConfidence: confidenceScore.optional(),
+  limit: positiveInt.max(1000).optional().default(100),
+  offset: nonNegativeInt.optional().default(0),
+  sortBy: z.enum(['createdAt', 'updatedAt', 'confidence']).optional(),
+  sortOrder: z.enum(['asc', 'desc']).optional(),
+})
+// ============================================================================
+// Profile Schemas
+// ============================================================================
+/**
+ * Fact type enum schema
+ */
+export const factTypeSchema = z.enum(['static', 'dynamic'])
+/**
+ * Fact category enum schema
+ */
+export const factCategorySchema = z.enum([
+  'identity',
+  'preference',
+  'skill',
+  'background',
+  'relationship',
+  'project',
+  'goal',
+  'context',
+  'other',
+])
+/**
+ * Profile fact input schema
+ */
+export const profileFactInputSchema = z.object({
+  content: nonEmptyString.describe('Fact content'),
+  type: factTypeSchema.optional().describe('Fact type'),
+  category: factCategorySchema.optional().describe('Fact category'),
+  confidence: confidenceScore.optional().describe('Confidence score'),
+  sourceId: z.string().optional().describe('Source identifier'),
+})
+// ============================================================================
+// Search Schemas
+// ============================================================================
+/**
+ * Search mode enum schema
+ */
+export const searchModeSchema = z.enum(['vector', 'memory', 'fulltext', 'hybrid'])
+/**
+ * Metadata filter operator schema
+ */
+export const filterOperatorSchema = z.enum(['eq', 'ne', 'gt', 'gte', 'lt', 'lte', 'contains', 'startsWith'])
+/**
+ * Metadata filter schema
+ */
+export const metadataFilterSchema = z.object({
+  key: nonEmptyString.describe('Metadata key to filter'),
+  value: z.union([z.string(), z.number(), z.boolean()]).describe('Filter value'),
+  operator: filterOperatorSchema.optional().default('eq').describe('Comparison operator'),
+})
+/**
+ * Search options schema
+ */
+export const searchOptionsSchema = z.object({
+  query: nonEmptyString.describe('Search query'),
+  containerTag: containerTagSchema.optional(),
+  searchMode: searchModeSchema.optional().default('hybrid'),
+  limit: positiveInt.max(100).optional().default(10),
+  threshold: confidenceScore.optional().default(0.7),
+  rerank: z.boolean().optional().default(true),
+  rewriteQuery: z.boolean().optional().default(true),
+  filters: z.array(metadataFilterSchema).optional(),
+  dateRange: dateRangeSchema.optional(),
+  includeEmbeddings: z.boolean().optional().default(false),
+})
+// ============================================================================
+// Extraction Schemas
+// ============================================================================
+/**
+ * Content type enum schema
+ */
+export const contentTypeSchema = z.enum(['text', 'url', 'markdown', 'html', 'json', 'pdf', 'image', 'unknown'])
+/**
+ * Chunking strategy enum schema
+ */
+export const chunkingStrategySchema = z.enum(['sentence', 'paragraph', 'fixed', 'semantic', 'sliding_window'])
+/**
+ * Document input schema
+ */
+export const documentInputSchema = z.object({
+  content: nonEmptyString.describe('Document content'),
+  containerTag: containerTagSchema.optional(),
+  contentType: contentTypeSchema.optional(),
+  metadata: z.record(z.unknown()).optional(),
+  chunkingStrategy: chunkingStrategySchema.optional(),
+})
+// ============================================================================
+// Validation Functions
+// ============================================================================
+/**
+ * Validate input against a schema
+ * @throws ValidationError if validation fails
+ */
+export function validate<T>(schema: ZodSchema<T>, input: unknown): T {
+  try {
+    return schema.parse(input)
+  } catch (error) {
+    if (error instanceof ZodError) {
+      throw ValidationError.fromZodError(error)
+    }
+    throw error
+  }
+}
+/**
+ * Validate input against a schema, returning result without throwing
+ */
+export function validateSafe<T>(
+  schema: ZodSchema<T>,
+  input: unknown
+): { success: true; data: T } | { success: false; error: ValidationError } {
+  try {
+    const data = schema.parse(input)
+    return { success: true, data }
+  } catch (error) {
+    if (error instanceof ZodError) {
+      return { success: false, error: ValidationError.fromZodError(error) }
+    }
+    return {
+      success: false,
+      error: new ValidationError(error instanceof Error ? error.message : 'Unknown validation error'),
+    }
+  }
+}
+/**
+ * Validate and coerce input, applying defaults
+ */
+export function validateWithDefaults<T>(schema: ZodSchema<T>, input: unknown): T {
+  return validate(schema, input)
+}
+/**
+ * Create a validator function for a schema
+ */
+export function createValidator<T>(schema: ZodSchema<T>): (input: unknown) => T {
+  return (input: unknown) => validate(schema, input)
+}
+/**
+ * Assert that a value is defined (not null or undefined)
+ */
+export function assertDefined<T>(value: T | null | undefined, name: string): asserts value is T {
+  if (value === null || value === undefined) {
+    throw new ValidationError(`${name} is required`, { [name]: ['Value is required'] })
+  }
+}
+/**
+ * Assert that a string is non-empty
+ */
+export function assertNonEmpty(value: string | null | undefined, name: string): asserts value is string {
+  assertDefined(value, name)
+  if (value.trim().length === 0) {
+    throw new ValidationError(`${name} cannot be empty`, { [name]: ['Value cannot be empty'] })
+  }
+}
+// ============================================================================
+// Custom Validators
+// ============================================================================
+/**
+ * Validate memory content
+ */
+export function validateMemoryContent(content: string): void {
+  if (!content || content.trim().length === 0) {
+    throw new ValidationError('Memory content cannot be empty', {
+      content: ['Content is required and cannot be empty'],
+    })
+  }
+  if (content.length > 100000) {
+    throw new ValidationError('Memory content exceeds maximum length', {
+      content: ['Content must be less than 100,000 characters'],
+    })
+  }
+}
+/**
+ * Validate search query
+ */
+export function validateSearchQuery(query: string): void {
+  if (!query || query.trim().length === 0) {
+    throw new ValidationError('Search query cannot be empty', {
+      query: ['Query is required and cannot be empty'],
+    })
+  }
+  if (query.length > 10000) {
+    throw new ValidationError('Search query exceeds maximum length', {
+      query: ['Query must be less than 10,000 characters'],
+    })
+  }
+}
+/**
+ * Validate container tag
+ */
+export function validateContainerTag(containerTag: string | undefined): void {
+  if (containerTag !== undefined) {
+    validate(containerTagSchema, containerTag)
+  }
+}
+// ============================================================================
+// Security Schemas
+// ============================================================================
+/**
+ * Content with size limit schema (50KB default).
+ * Use this for user-provided content fields.
+ */
+export const boundedContentSchema = z
+  .string()
+  .min(1, 'Content cannot be empty')
+  .max(MAX_CONTENT_SIZE, `Content must be at most ${MAX_CONTENT_SIZE} characters (50KB)`)
+/**
+ * Query with size limit schema (10KB).
+ * Use this for search queries.
+ */
+export const boundedQuerySchema = z
+  .string()
+  .min(1, 'Query cannot be empty')
+  .max(MAX_QUERY_LENGTH, `Query must be at most ${MAX_QUERY_LENGTH} characters`)
+/**
+ * Safe path schema that prevents path traversal attacks.
+ * Rejects paths containing:
+ * - Parent directory references (..)
+ * - Absolute paths (starting with / or drive letters)
+ * - URL-encoded traversal sequences
+ * - Null bytes and control characters
+ */
+export const safePathSchema = z
+  .string()
+  .min(1, 'Path cannot be empty')
+  .max(1024, 'Path must be at most 1024 characters')
+  .refine(
+    (path) => isPathSafe(path),
+    'Path contains invalid characters or traversal sequences (e.g., "..", absolute paths)'
+  )
+/**
+ * Safe URL schema with protocol whitelist.
+ * Only allows http and https protocols.
+ * Rejects javascript:, data:, and other potentially dangerous schemes.
+ */
+export const safeUrlSchema = z
+  .string()
+  .url('Invalid URL format')
+  .refine(
+    (url) => {
+      try {
+        const parsed = new URL(url)
+        return ALLOWED_URL_PROTOCOLS.includes(parsed.protocol)
+      } catch {
+        return false
+      }
+    },
+    { message: 'URL must use http or https protocol' }
+  )
+  .transform((url) => sanitizeUrl(url))
+/**
+ * Optional safe URL schema.
+ */
+export const optionalSafeUrlSchema = safeUrlSchema.optional()
+/**
+ * Sanitized string schema that auto-strips XSS vectors.
+ * Content is sanitized during parsing, removing dangerous HTML/JavaScript.
+ */
+export const sanitizedStringSchema = z.string().transform((val) => sanitizeHtml(val))
+/**
+ * Strictly sanitized string schema for storage.
+ * Removes all HTML, preserving only plain text.
+ */
+export const sanitizedStorageStringSchema = z.string().transform((val) => sanitizeForStorage(val))
+/**
+ * Metadata schema with size limit (10KB JSON).
+ * Ensures metadata doesn't exceed reasonable size.
+ */
+export const boundedMetadataSchema = z
+  .record(z.unknown())
+  .optional()
+  .refine(
+    (metadata) => {
+      if (!metadata) return true
+      try {
+        const jsonSize = new TextEncoder().encode(JSON.stringify(metadata)).length
+        return jsonSize <= MAX_METADATA_SIZE
+      } catch {
+        return false
+      }
+    },
+    { message: `Metadata must be at most ${MAX_METADATA_SIZE} bytes (10KB)` }
+  )
+/**
+ * ID schema - alphanumeric with hyphens, underscores, max 255 chars.
+ */
+export const safeIdSchema = z
+  .string()
+  .min(1, 'ID cannot be empty')
+  .max(255, 'ID must be at most 255 characters')
+  .regex(/^[a-zA-Z0-9_-]+$/, 'ID can only contain alphanumeric characters, underscores, and hyphens')
+/**
+ * Email schema with sanitization.
+ */
+export const safeEmailSchema = z
+  .string()
+  .email('Invalid email format')
+  .max(255, 'Email must be at most 255 characters')
+  .transform((email) => email.toLowerCase().trim())
+// ============================================================================
+// Composite Security Schemas
+// ============================================================================
+/**
+ * Secure memory content input schema with all security validations.
+ */
+export const secureMemoryInputSchema = z.object({
+  content: boundedContentSchema.describe('Memory content (max 50KB)'),
+  type: memoryTypeSchema.optional().describe('Memory type'),
+  containerTag: containerTagSchema.optional().describe('Container tag (max 100 chars)'),
+  metadata: boundedMetadataSchema.describe('Additional metadata (max 10KB)'),
+})
+/**
+ * Secure search query schema with input validation.
+ */
+export const secureSearchQuerySchema = z.object({
+  query: boundedQuerySchema.describe('Search query (max 10KB)'),
+  containerTag: containerTagSchema.optional(),
+  limit: positiveInt.max(100).optional().default(10),
+  threshold: confidenceScore.optional().default(0.7),
+})
+/**
+ * Secure document input schema for API submissions.
+ */
+export const secureDocumentInputSchema = z.object({
+  content: boundedContentSchema.describe('Document content (max 50KB)'),
+  containerTag: containerTagSchema.optional().describe('Container tag'),
+  contentType: contentTypeSchema.optional().describe('Content type'),
+  sourceUrl: optionalSafeUrlSchema.describe('Source URL (http/https only)'),
+  title: z.string().max(500, 'Title must be at most 500 characters').optional(),
+  metadata: boundedMetadataSchema.describe('Metadata (max 10KB)'),
+})
+// ============================================================================
+// Validation Helpers for Security
+// ============================================================================
+/**
+ * Validates that content does not exceed maximum size.
+ * @throws ValidationError if content is too large
+ */
+export function validateContentSize(content: string, maxSize = MAX_CONTENT_SIZE): void {
+  const size = new TextEncoder().encode(content).length
+  if (size > maxSize) {
+    throw new ValidationError(`Content size ${size} bytes exceeds maximum of ${maxSize} bytes`, {
+      content: [`Content must be at most ${maxSize} bytes`],
+    })
+  }
+}
+/**
+ * Validates that a path is safe (no traversal attacks).
+ * @throws ValidationError if path is unsafe
+ */
+export function validatePath(path: string): void {
+  if (!isPathSafe(path)) {
+    throw new ValidationError('Path contains invalid characters or traversal sequences', {
+      path: ['Path cannot contain "..", absolute paths, or control characters'],
+    })
+  }
+}
+/**
+ * Validates that a URL uses allowed protocols.
+ * @throws ValidationError if URL is unsafe
+ */
+export function validateUrl(url: string): void {
+  try {
+    const parsed = new URL(url)
+    if (!ALLOWED_URL_PROTOCOLS.includes(parsed.protocol)) {
+      throw new ValidationError(`URL protocol "${parsed.protocol}" is not allowed`, {
+        url: ['URL must use http or https protocol'],
+      })
+    }
+  } catch (error) {
+    if (error instanceof ValidationError) throw error
+    throw new ValidationError('Invalid URL format', { url: ['Must be a valid URL'] })
+  }
+}
+/**
+ * Validates metadata size doesn't exceed limit.
+ * @throws ValidationError if metadata is too large
+ */
+export function validateMetadataSize(metadata: Record<string, unknown> | undefined): void {
+  if (!metadata) return
+  try {
+    const size = new TextEncoder().encode(JSON.stringify(metadata)).length
+    if (size > MAX_METADATA_SIZE) {
+      throw new ValidationError(`Metadata size ${size} bytes exceeds maximum of ${MAX_METADATA_SIZE} bytes`, {
+        metadata: [`Metadata must be at most ${MAX_METADATA_SIZE} bytes (10KB)`],
+      })
+    }
+  } catch (error) {
+    if (error instanceof ValidationError) throw error
+    throw new ValidationError('Invalid metadata format', { metadata: ['Metadata must be valid JSON'] })
+  }
+}