@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sanitization Utilities for Supermemory Clone
|
|
3
|
+
*
|
|
4
|
+
* Provides XSS sanitization, HTML stripping, and content sanitization
|
|
5
|
+
* for secure storage and display of user-provided content.
|
|
6
|
+
*
|
|
7
|
+
* Uses isomorphic-dompurify for cross-platform (Node.js/browser) XSS prevention.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import DOMPurifyDefault from 'isomorphic-dompurify'
|
|
11
|
+
|
|
12
|
+
// Use the sanitize function directly to avoid type conflicts between dompurify versions
|
|
13
|
+
const sanitize = DOMPurifyDefault.sanitize.bind(DOMPurifyDefault)
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Configuration
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* DOMPurify configuration type (subset of options we use)
|
|
21
|
+
*/
|
|
22
|
+
interface SanitizeConfig {
|
|
23
|
+
ALLOWED_TAGS?: string[]
|
|
24
|
+
ALLOWED_ATTR?: string[]
|
|
25
|
+
ALLOW_DATA_ATTR?: boolean
|
|
26
|
+
FORBID_TAGS?: string[]
|
|
27
|
+
FORBID_ATTR?: string[]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Default DOMPurify configuration for general sanitization.
|
|
32
|
+
* Allows common formatting tags but strips dangerous elements.
|
|
33
|
+
*/
|
|
34
|
+
const DEFAULT_SANITIZE_CONFIG: SanitizeConfig = {
|
|
35
|
+
ALLOWED_TAGS: [
|
|
36
|
+
'p',
|
|
37
|
+
'br',
|
|
38
|
+
'strong',
|
|
39
|
+
'em',
|
|
40
|
+
'b',
|
|
41
|
+
'i',
|
|
42
|
+
'u',
|
|
43
|
+
's',
|
|
44
|
+
'strike',
|
|
45
|
+
'sub',
|
|
46
|
+
'sup',
|
|
47
|
+
'blockquote',
|
|
48
|
+
'code',
|
|
49
|
+
'pre',
|
|
50
|
+
'ul',
|
|
51
|
+
'ol',
|
|
52
|
+
'li',
|
|
53
|
+
'h1',
|
|
54
|
+
'h2',
|
|
55
|
+
'h3',
|
|
56
|
+
'h4',
|
|
57
|
+
'h5',
|
|
58
|
+
'h6',
|
|
59
|
+
'a',
|
|
60
|
+
'span',
|
|
61
|
+
'div',
|
|
62
|
+
'table',
|
|
63
|
+
'thead',
|
|
64
|
+
'tbody',
|
|
65
|
+
'tr',
|
|
66
|
+
'th',
|
|
67
|
+
'td',
|
|
68
|
+
],
|
|
69
|
+
ALLOWED_ATTR: ['href', 'title', 'target', 'rel', 'class', 'id'],
|
|
70
|
+
ALLOW_DATA_ATTR: false,
|
|
71
|
+
FORBID_TAGS: ['script', 'style', 'iframe', 'object', 'embed', 'form', 'input', 'button'],
|
|
72
|
+
FORBID_ATTR: ['onerror', 'onload', 'onclick', 'onmouseover', 'onfocus', 'onblur'],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Strict sanitization configuration for storage.
|
|
77
|
+
* Only allows basic text formatting, removes all potentially dangerous content.
|
|
78
|
+
*/
|
|
79
|
+
const STORAGE_SANITIZE_CONFIG: SanitizeConfig = {
|
|
80
|
+
ALLOWED_TAGS: ['p', 'br', 'strong', 'em', 'b', 'i', 'code', 'pre', 'ul', 'ol', 'li'],
|
|
81
|
+
ALLOWED_ATTR: [],
|
|
82
|
+
ALLOW_DATA_ATTR: false,
|
|
83
|
+
FORBID_TAGS: ['script', 'style', 'iframe', 'object', 'embed', 'form', 'input', 'button', 'a', 'img'],
|
|
84
|
+
FORBID_ATTR: ['href', 'src', 'onerror', 'onload', 'onclick', 'onmouseover', 'onfocus', 'onblur'],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ============================================================================
|
|
88
|
+
// Core Sanitization Functions
|
|
89
|
+
// ============================================================================
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Sanitizes HTML content by removing XSS vectors while preserving safe formatting.
|
|
93
|
+
*
|
|
94
|
+
* This function is suitable for content that will be displayed in HTML context.
|
|
95
|
+
* It removes dangerous elements (script, iframe, etc.) and event handlers while
|
|
96
|
+
* allowing common formatting tags.
|
|
97
|
+
*
|
|
98
|
+
* @param content - The HTML content to sanitize
|
|
99
|
+
* @param config - Optional custom DOMPurify configuration
|
|
100
|
+
* @returns Sanitized HTML string safe for rendering
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const unsafe = '<script>alert("xss")</script><p>Hello <b>World</b></p>';
|
|
105
|
+
* const safe = sanitizeHtml(unsafe);
|
|
106
|
+
* // Returns: '<p>Hello <b>World</b></p>'
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
export function sanitizeHtml(content: string, config?: SanitizeConfig): string {
|
|
110
|
+
if (!content || typeof content !== 'string') {
|
|
111
|
+
return ''
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return sanitize(content, config ?? DEFAULT_SANITIZE_CONFIG)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Sanitizes content for safe storage in the database.
|
|
119
|
+
*
|
|
120
|
+
* Uses stricter sanitization rules suitable for long-term storage.
|
|
121
|
+
* Removes all links, images, and potentially dangerous attributes while
|
|
122
|
+
* preserving basic text formatting.
|
|
123
|
+
*
|
|
124
|
+
* @param content - The content to sanitize for storage
|
|
125
|
+
* @returns Sanitized content safe for database storage
|
|
126
|
+
*
|
|
127
|
+
* @example
|
|
128
|
+
* ```typescript
|
|
129
|
+
* const input = '<a href="javascript:alert(1)">Click</a><p>Text</p>';
|
|
130
|
+
* const safe = sanitizeForStorage(input);
|
|
131
|
+
* // Returns: 'Click<p>Text</p>'
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
export function sanitizeForStorage(content: string): string {
|
|
135
|
+
if (!content || typeof content !== 'string') {
|
|
136
|
+
return ''
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return sanitize(content, STORAGE_SANITIZE_CONFIG)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Strips all HTML tags from content, returning plain text.
|
|
144
|
+
*
|
|
145
|
+
* Useful for creating search indexes, summaries, or text-only displays.
|
|
146
|
+
* Preserves whitespace and line breaks where appropriate.
|
|
147
|
+
*
|
|
148
|
+
* @param content - The HTML content to strip
|
|
149
|
+
* @returns Plain text with all HTML tags removed
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* ```typescript
|
|
153
|
+
* const html = '<p>Hello <strong>World</strong>!</p>';
|
|
154
|
+
* const text = stripHtml(html);
|
|
155
|
+
* // Returns: 'Hello World!'
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
export function stripHtml(content: string): string {
|
|
159
|
+
if (!content || typeof content !== 'string') {
|
|
160
|
+
return ''
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// First sanitize to remove any malicious content, then strip tags
|
|
164
|
+
const sanitized = sanitize(content, { ALLOWED_TAGS: [] })
|
|
165
|
+
|
|
166
|
+
// Decode any HTML entities that remain
|
|
167
|
+
return decodeHtmlEntities(sanitized)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Decodes common HTML entities to their text equivalents.
|
|
172
|
+
*
|
|
173
|
+
* @param text - Text with HTML entities
|
|
174
|
+
* @returns Decoded text
|
|
175
|
+
*/
|
|
176
|
+
function decodeHtmlEntities(text: string): string {
|
|
177
|
+
const entities: Record<string, string> = {
|
|
178
|
+
'&': '&',
|
|
179
|
+
'<': '<',
|
|
180
|
+
'>': '>',
|
|
181
|
+
'"': '"',
|
|
182
|
+
''': "'",
|
|
183
|
+
''': "'",
|
|
184
|
+
' ': ' ',
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
let decoded = text
|
|
188
|
+
for (const [entity, char] of Object.entries(entities)) {
|
|
189
|
+
decoded = decoded.replace(new RegExp(entity, 'g'), char)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Handle numeric entities
|
|
193
|
+
decoded = decoded.replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num, 10)))
|
|
194
|
+
decoded = decoded.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
|
|
195
|
+
|
|
196
|
+
return decoded
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ============================================================================
|
|
200
|
+
// URL Sanitization
|
|
201
|
+
// ============================================================================
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Allowed URL protocols for links and resources.
|
|
205
|
+
*/
|
|
206
|
+
const ALLOWED_PROTOCOLS = ['http:', 'https:', 'mailto:']
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Sanitizes a URL by validating the protocol and structure.
|
|
210
|
+
*
|
|
211
|
+
* Prevents javascript: URLs, data: URLs with executable content,
|
|
212
|
+
* and other potentially dangerous URI schemes.
|
|
213
|
+
*
|
|
214
|
+
* @param url - The URL to sanitize
|
|
215
|
+
* @returns Sanitized URL or empty string if invalid/dangerous
|
|
216
|
+
*
|
|
217
|
+
* @example
|
|
218
|
+
* ```typescript
|
|
219
|
+
* sanitizeUrl('https://example.com'); // Returns: 'https://example.com'
|
|
220
|
+
* sanitizeUrl('javascript:alert(1)'); // Returns: ''
|
|
221
|
+
* sanitizeUrl('data:text/html,...'); // Returns: ''
|
|
222
|
+
* ```
|
|
223
|
+
*/
|
|
224
|
+
export function sanitizeUrl(url: string): string {
|
|
225
|
+
if (!url || typeof url !== 'string') {
|
|
226
|
+
return ''
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const trimmed = url.trim()
|
|
230
|
+
|
|
231
|
+
// Check for empty URL
|
|
232
|
+
if (!trimmed) {
|
|
233
|
+
return ''
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
try {
|
|
237
|
+
const parsed = new URL(trimmed)
|
|
238
|
+
|
|
239
|
+
// Validate protocol
|
|
240
|
+
if (!ALLOWED_PROTOCOLS.includes(parsed.protocol)) {
|
|
241
|
+
return ''
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Reconstruct the URL to normalize it
|
|
245
|
+
return parsed.toString()
|
|
246
|
+
} catch {
|
|
247
|
+
// If URL parsing fails, it might be a relative URL - return as-is if safe
|
|
248
|
+
if (trimmed.startsWith('/') && !trimmed.startsWith('//')) {
|
|
249
|
+
// Relative URL starting with / - generally safe
|
|
250
|
+
return trimmed
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return ''
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Checks if a URL is safe for use (valid protocol, no XSS vectors).
|
|
259
|
+
*
|
|
260
|
+
* @param url - The URL to validate
|
|
261
|
+
* @returns True if the URL is considered safe
|
|
262
|
+
*/
|
|
263
|
+
export function isUrlSafe(url: string): boolean {
|
|
264
|
+
return sanitizeUrl(url) !== '' || url === ''
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ============================================================================
|
|
268
|
+
// Path Sanitization
|
|
269
|
+
// ============================================================================
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Dangerous path patterns that could enable path traversal attacks.
|
|
273
|
+
*/
|
|
274
|
+
const DANGEROUS_PATH_PATTERNS = [
|
|
275
|
+
/\.\./g, // Parent directory traversal
|
|
276
|
+
/^\//, // Absolute paths
|
|
277
|
+
/^[a-zA-Z]:[\\/]/, // Windows absolute paths
|
|
278
|
+
/\0/, // Null bytes
|
|
279
|
+
/%2e%2e/gi, // URL-encoded ..
|
|
280
|
+
/%252e%252e/gi, // Double URL-encoded ..
|
|
281
|
+
/%c0%ae/gi, // UTF-8 encoded .
|
|
282
|
+
/%c1%9c/gi, // UTF-8 encoded /
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Sanitizes a file path to prevent path traversal attacks.
|
|
287
|
+
*
|
|
288
|
+
* Removes parent directory references (..), absolute path prefixes,
|
|
289
|
+
* and other potentially dangerous path components.
|
|
290
|
+
*
|
|
291
|
+
* @param path - The path to sanitize
|
|
292
|
+
* @returns Sanitized relative path or null if path is deemed unsafe
|
|
293
|
+
*
|
|
294
|
+
* @example
|
|
295
|
+
* ```typescript
|
|
296
|
+
* sanitizePath('documents/file.txt'); // Returns: 'documents/file.txt'
|
|
297
|
+
* sanitizePath('../etc/passwd'); // Returns: null
|
|
298
|
+
* sanitizePath('/absolute/path'); // Returns: null
|
|
299
|
+
* sanitizePath('docs/../secret'); // Returns: null
|
|
300
|
+
* ```
|
|
301
|
+
*/
|
|
302
|
+
export function sanitizePath(path: string): string | null {
|
|
303
|
+
if (!path || typeof path !== 'string') {
|
|
304
|
+
return null
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const trimmed = path.trim()
|
|
308
|
+
|
|
309
|
+
// Check for dangerous patterns
|
|
310
|
+
for (const pattern of DANGEROUS_PATH_PATTERNS) {
|
|
311
|
+
if (pattern.test(trimmed)) {
|
|
312
|
+
return null
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Additional validation: no control characters
|
|
317
|
+
// eslint-disable-next-line no-control-regex
|
|
318
|
+
if (/[\x00-\x1f\x7f]/.test(trimmed)) {
|
|
319
|
+
return null
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Normalize path separators
|
|
323
|
+
const normalized = trimmed.replace(/\\/g, '/')
|
|
324
|
+
|
|
325
|
+
// Split and filter path components
|
|
326
|
+
const components = normalized.split('/').filter((component) => {
|
|
327
|
+
// Remove empty components and single dots
|
|
328
|
+
return component && component !== '.'
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
// Rejoin and return
|
|
332
|
+
return components.join('/')
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Checks if a path is safe (no traversal attacks possible).
|
|
337
|
+
*
|
|
338
|
+
* @param path - The path to validate
|
|
339
|
+
* @returns True if the path is considered safe
|
|
340
|
+
*/
|
|
341
|
+
export function isPathSafe(path: string): boolean {
|
|
342
|
+
return sanitizePath(path) !== null
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// ============================================================================
|
|
346
|
+
// Content Type Detection
|
|
347
|
+
// ============================================================================
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Detects if content contains HTML that needs sanitization.
|
|
351
|
+
*
|
|
352
|
+
* @param content - The content to check
|
|
353
|
+
* @returns True if content contains HTML tags
|
|
354
|
+
*/
|
|
355
|
+
export function containsHtml(content: string): boolean {
|
|
356
|
+
if (!content || typeof content !== 'string') {
|
|
357
|
+
return false
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Check for HTML tags
|
|
361
|
+
return /<[a-z][\s\S]*>/i.test(content)
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Detects if content contains potentially dangerous script content.
|
|
366
|
+
*
|
|
367
|
+
* @param content - The content to check
|
|
368
|
+
* @returns True if content contains script-like patterns
|
|
369
|
+
*/
|
|
370
|
+
export function containsScript(content: string): boolean {
|
|
371
|
+
if (!content || typeof content !== 'string') {
|
|
372
|
+
return false
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const scriptPatterns = [
|
|
376
|
+
/<script[\s\S]*?>[\s\S]*?<\/script>/gi,
|
|
377
|
+
/javascript:/gi,
|
|
378
|
+
/on\w+\s*=/gi, // Event handlers like onclick=, onerror=
|
|
379
|
+
/data:text\/html/gi,
|
|
380
|
+
/vbscript:/gi,
|
|
381
|
+
]
|
|
382
|
+
|
|
383
|
+
return scriptPatterns.some((pattern) => pattern.test(content))
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// ============================================================================
|
|
387
|
+
// Markdown Sanitization
|
|
388
|
+
// ============================================================================
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Sanitizes Markdown content by escaping potentially dangerous patterns.
|
|
392
|
+
*
|
|
393
|
+
* Allows standard Markdown syntax while preventing injection attacks
|
|
394
|
+
* through links or embedded HTML.
|
|
395
|
+
*
|
|
396
|
+
* @param markdown - The Markdown content to sanitize
|
|
397
|
+
* @returns Sanitized Markdown content
|
|
398
|
+
*/
|
|
399
|
+
export function sanitizeMarkdown(markdown: string): string {
|
|
400
|
+
if (!markdown || typeof markdown !== 'string') {
|
|
401
|
+
return ''
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
let sanitized = markdown
|
|
405
|
+
|
|
406
|
+
// Escape HTML tags that aren't part of standard Markdown
|
|
407
|
+
sanitized = sanitized.replace(/<script[\s\S]*?>[\s\S]*?<\/script>/gi, '')
|
|
408
|
+
sanitized = sanitized.replace(/<style[\s\S]*?>[\s\S]*?<\/style>/gi, '')
|
|
409
|
+
sanitized = sanitized.replace(/<iframe[\s\S]*?>[\s\S]*?<\/iframe>/gi, '')
|
|
410
|
+
|
|
411
|
+
// Sanitize link URLs in Markdown [text](url) format
|
|
412
|
+
sanitized = sanitized.replace(/\[([^\]]*)\]\(([^)]*)\)/g, (_, text, url) => {
|
|
413
|
+
const safeUrl = sanitizeUrl(url)
|
|
414
|
+
return safeUrl ? `[${text}](${safeUrl})` : text
|
|
415
|
+
})
|
|
416
|
+
|
|
417
|
+
// Sanitize image URLs in Markdown  format
|
|
418
|
+
sanitized = sanitized.replace(/!\[([^\]]*)\]\(([^)]*)\)/g, (_, alt, url) => {
|
|
419
|
+
const safeUrl = sanitizeUrl(url)
|
|
420
|
+
return safeUrl ? `` : alt
|
|
421
|
+
})
|
|
422
|
+
|
|
423
|
+
return sanitized
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ============================================================================
|
|
427
|
+
// JSON Sanitization
|
|
428
|
+
// ============================================================================
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Maximum depth for JSON object traversal to prevent DoS.
|
|
432
|
+
*/
|
|
433
|
+
const MAX_JSON_DEPTH = 10
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Sanitizes a JSON object by removing potentially dangerous properties
|
|
437
|
+
* and sanitizing string values.
|
|
438
|
+
*
|
|
439
|
+
* @param obj - The object to sanitize
|
|
440
|
+
* @param depth - Current recursion depth (internal use)
|
|
441
|
+
* @returns Sanitized object
|
|
442
|
+
*/
|
|
443
|
+
export function sanitizeJsonObject<T extends Record<string, unknown>>(obj: T, depth = 0): T {
|
|
444
|
+
if (depth > MAX_JSON_DEPTH) {
|
|
445
|
+
return {} as T
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (obj === null || typeof obj !== 'object') {
|
|
449
|
+
return obj
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (Array.isArray(obj)) {
|
|
453
|
+
return obj.map((item) =>
|
|
454
|
+
typeof item === 'object' && item !== null
|
|
455
|
+
? sanitizeJsonObject(item as Record<string, unknown>, depth + 1)
|
|
456
|
+
: typeof item === 'string'
|
|
457
|
+
? stripHtml(item)
|
|
458
|
+
: item
|
|
459
|
+
) as unknown as T
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const result: Record<string, unknown> = {}
|
|
463
|
+
|
|
464
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
465
|
+
// Skip prototype pollution vectors
|
|
466
|
+
if (key === '__proto__' || key === 'constructor' || key === 'prototype') {
|
|
467
|
+
continue
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (typeof value === 'string') {
|
|
471
|
+
// Sanitize string values
|
|
472
|
+
result[key] = stripHtml(value)
|
|
473
|
+
} else if (typeof value === 'object' && value !== null) {
|
|
474
|
+
// Recursively sanitize nested objects
|
|
475
|
+
result[key] = sanitizeJsonObject(value as Record<string, unknown>, depth + 1)
|
|
476
|
+
} else {
|
|
477
|
+
// Preserve other primitive values
|
|
478
|
+
result[key] = value
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return result as T
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// ============================================================================
|
|
486
|
+
// Export All Functions
|
|
487
|
+
// ============================================================================
|
|
488
|
+
|
|
489
|
+
export default {
|
|
490
|
+
sanitizeHtml,
|
|
491
|
+
sanitizeForStorage,
|
|
492
|
+
stripHtml,
|
|
493
|
+
sanitizeUrl,
|
|
494
|
+
isUrlSafe,
|
|
495
|
+
sanitizePath,
|
|
496
|
+
isPathSafe,
|
|
497
|
+
containsHtml,
|
|
498
|
+
containsScript,
|
|
499
|
+
sanitizeMarkdown,
|
|
500
|
+
sanitizeJsonObject,
|
|
501
|
+
}
|