s3db.js 8.2.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,440 @@
1
+ /**
2
+ * Advanced metadata encoding for S3
3
+ * Pattern-specific optimizations for common data types
4
+ */
5
+
6
+ import { encode as toBase62, decode as fromBase62 } from './base62.js';
7
+
8
+ // Common dictionary values mapping
9
+ const DICTIONARY = {
10
+ // Status values
11
+ 'active': '\x01',
12
+ 'inactive': '\x02',
13
+ 'pending': '\x03',
14
+ 'completed': '\x04',
15
+ 'failed': '\x05',
16
+ 'deleted': '\x06',
17
+ 'archived': '\x07',
18
+ 'draft': '\x08',
19
+
20
+ // Booleans
21
+ 'true': '\x10',
22
+ 'false': '\x11',
23
+ 'yes': '\x12',
24
+ 'no': '\x13',
25
+ '1': '\x14',
26
+ '0': '\x15',
27
+
28
+ // HTTP methods (lowercase for matching)
29
+ 'get': '\x20',
30
+ 'post': '\x21',
31
+ 'put': '\x22',
32
+ 'delete': '\x23',
33
+ 'patch': '\x24',
34
+ 'head': '\x25',
35
+ 'options': '\x26',
36
+
37
+ // Common words
38
+ 'enabled': '\x30',
39
+ 'disabled': '\x31',
40
+ 'success': '\x32',
41
+ 'error': '\x33',
42
+ 'warning': '\x34',
43
+ 'info': '\x35',
44
+ 'debug': '\x36',
45
+ 'critical': '\x37',
46
+
47
+ // Null-like values
48
+ 'null': '\x40',
49
+ 'undefined': '\x41',
50
+ 'none': '\x42',
51
+ 'empty': '\x43',
52
+ 'nil': '\x44',
53
+ };
54
+
55
+ // Reverse dictionary for decoding
56
+ const REVERSE_DICTIONARY = Object.fromEntries(
57
+ Object.entries(DICTIONARY).map(([k, v]) => [v, k])
58
+ );
59
+
60
+ /**
61
+ * Detect if string is a UUID
62
+ */
63
+ function isUUID(str) {
64
+ return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(str);
65
+ }
66
+
67
+ /**
68
+ * Detect if string is hexadecimal
69
+ */
70
+ function isHexString(str) {
71
+ return /^[0-9a-f]+$/i.test(str) && str.length >= 8 && str.length % 2 === 0;
72
+ }
73
+
74
+ /**
75
+ * Detect if string is a timestamp (Unix or milliseconds)
76
+ */
77
+ function isTimestamp(str) {
78
+ if (!/^\d+$/.test(str)) return false;
79
+ const num = parseInt(str);
80
+ // Unix timestamps: 1000000000 (2001) to 2000000000 (2033)
81
+ // Millisecond timestamps: 1000000000000 (2001) to 2000000000000 (2033)
82
+ return (num >= 1000000000 && num <= 2000000000) ||
83
+ (num >= 1000000000000 && num <= 2000000000000);
84
+ }
85
+
86
+ /**
87
+ * Detect if string is an ISO 8601 timestamp
88
+ */
89
+ function isISOTimestamp(str) {
90
+ // Match ISO 8601 format: YYYY-MM-DDTHH:mm:ss.sssZ or ±HH:MM
91
+ return /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?(Z|[+-]\d{2}:\d{2})?$/.test(str);
92
+ }
93
+
94
+ /**
95
+ * Detect if string is an integer that would benefit from base62
96
+ */
97
+ function isBeneficialInteger(str) {
98
+ if (!/^\d+$/.test(str)) return false;
99
+ // Only beneficial if base62 would be shorter
100
+ const num = parseInt(str);
101
+ return toBase62(num).length < str.length;
102
+ }
103
+
104
+ /**
105
+ * Encode a value using pattern detection
106
+ */
107
+ export function advancedEncode(value) {
108
+ // Handle null and undefined
109
+ if (value === null) return { encoded: 'd' + DICTIONARY['null'], method: 'dictionary' };
110
+ if (value === undefined) return { encoded: 'd' + DICTIONARY['undefined'], method: 'dictionary' };
111
+
112
+ const str = String(value);
113
+
114
+ // Empty string
115
+ if (str === '') return { encoded: '', method: 'none' };
116
+
117
+ // Check dictionary first (most efficient)
118
+ const lowerStr = str.toLowerCase();
119
+ if (DICTIONARY[lowerStr]) {
120
+ // Preserve uppercase for HTTP methods
121
+ const isUpperCase = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'].includes(str);
122
+ return {
123
+ encoded: 'd' + DICTIONARY[lowerStr] + (isUpperCase ? 'U' : ''),
124
+ method: 'dictionary',
125
+ original: str
126
+ };
127
+ }
128
+
129
+ // ISO Timestamp optimization - convert to Unix timestamp with base62
130
+ if (isISOTimestamp(str)) {
131
+ const unixMs = new Date(str).getTime();
132
+ const hasMillis = str.includes('.');
133
+ const encoded = toBase62(unixMs); // Use milliseconds to preserve precision
134
+ // Add a flag for whether original had milliseconds: m = with millis, s = without
135
+ const flag = hasMillis ? 'm' : 's';
136
+ return {
137
+ encoded: 'i' + flag + encoded, // 'i' prefix + flag + encoded timestamp
138
+ method: 'iso-timestamp',
139
+ original: str,
140
+ savings: `${Math.round((1 - (encoded.length + 2)/str.length) * 100)}%`
141
+ };
142
+ }
143
+
144
+ // Numeric timestamp optimization with base62 (check before hex)
145
+ if (isTimestamp(str)) {
146
+ const encoded = toBase62(parseInt(str));
147
+ if (encoded.length < str.length) {
148
+ return {
149
+ encoded: 't' + encoded,
150
+ method: 'timestamp',
151
+ original: str,
152
+ savings: `${Math.round((1 - encoded.length/str.length) * 100)}%`
153
+ };
154
+ }
155
+ }
156
+
157
+ // UUID optimization: 36 chars → 16 bytes
158
+ if (isUUID(str)) {
159
+ const hex = str.replace(/-/g, '');
160
+ const binary = Buffer.from(hex, 'hex');
161
+ return {
162
+ encoded: 'u' + binary.toString('base64'),
163
+ method: 'uuid',
164
+ original: str,
165
+ savings: `${Math.round((1 - 24/36) * 100)}%` // base64 of 16 bytes = ~24 chars
166
+ };
167
+ }
168
+
169
+ // Hex string optimization (MD5, SHA, ObjectId): 50% compression
170
+ if (isHexString(str)) {
171
+ const binary = Buffer.from(str, 'hex');
172
+ return {
173
+ encoded: 'h' + binary.toString('base64'),
174
+ method: 'hex',
175
+ original: str,
176
+ savings: '33%' // hex to base64 is ~33% savings
177
+ };
178
+ }
179
+
180
+ // Integer optimization with base62
181
+ if (isBeneficialInteger(str)) {
182
+ const encoded = toBase62(parseInt(str));
183
+ return {
184
+ encoded: 'n' + encoded,
185
+ method: 'number',
186
+ original: str,
187
+ savings: `${Math.round((1 - encoded.length/str.length) * 100)}%`
188
+ };
189
+ }
190
+
191
+ // Check if it's pure ASCII
192
+ if (/^[\x20-\x7E]*$/.test(str)) {
193
+ // Check for common prefixes we could optimize
194
+ const prefixes = ['user_', 'sess_', 'item_', 'order_', 'tx_', 'id_', 'http://', 'https://'];
195
+ for (const prefix of prefixes) {
196
+ if (str.startsWith(prefix)) {
197
+ // Could implement prefix table, but for now just mark it
198
+ // In future: return { encoded: 'p' + prefixCode + str.slice(prefix.length), method: 'prefix' };
199
+ }
200
+ }
201
+
202
+ // Pure ASCII - add a marker to avoid confusion with encoded values
203
+ // Use '=' as marker for unencoded ASCII (not used by other encodings)
204
+ return { encoded: '=' + str, method: 'none' };
205
+ }
206
+
207
+ // Has special characters - fallback to smart encoding
208
+ // Check for Latin-1 vs multibyte
209
+ const hasMultibyte = /[^\x00-\xFF]/.test(str);
210
+
211
+ if (hasMultibyte) {
212
+ // Use base64 for emoji/CJK
213
+ return {
214
+ encoded: 'b' + Buffer.from(str, 'utf8').toString('base64'),
215
+ method: 'base64'
216
+ };
217
+ }
218
+
219
+ // Latin-1 characters - use URL encoding
220
+ return {
221
+ encoded: '%' + encodeURIComponent(str),
222
+ method: 'url'
223
+ };
224
+ }
225
+
226
+ /**
227
+ * Decode an advanced-encoded value
228
+ */
229
+ export function advancedDecode(value) {
230
+ if (!value || typeof value !== 'string') return value;
231
+ if (value.length === 0) return '';
232
+
233
+ // Check if this is actually an encoded value
234
+ // Encoded values have specific prefixes followed by encoded content
235
+ const prefix = value[0];
236
+ const content = value.slice(1);
237
+
238
+ // If no content after prefix, it's not encoded
239
+ if (content.length === 0 && prefix !== 'd') {
240
+ return value;
241
+ }
242
+
243
+ switch (prefix) {
244
+ case 'd': // Dictionary
245
+ if (content.endsWith('U')) {
246
+ // Uppercase flag for HTTP methods
247
+ const key = content.slice(0, -1);
248
+ const val = REVERSE_DICTIONARY[key];
249
+ return val ? val.toUpperCase() : value;
250
+ }
251
+ return REVERSE_DICTIONARY[content] || value;
252
+
253
+ case 'i': // ISO timestamp
254
+ try {
255
+ const flag = content[0]; // 'm' = with millis, 's' = without
256
+ const unixMs = fromBase62(content.slice(1)); // Now stored as milliseconds
257
+ const date = new Date(unixMs);
258
+ let iso = date.toISOString();
259
+ // Format based on original
260
+ if (flag === 's' && iso.endsWith('.000Z')) {
261
+ iso = iso.replace('.000', '');
262
+ }
263
+ return iso;
264
+ } catch {
265
+ return value;
266
+ }
267
+
268
+ case 'u': // UUID
269
+ try {
270
+ const binary = Buffer.from(content, 'base64');
271
+ const hex = binary.toString('hex');
272
+ // Reconstruct UUID format
273
+ return [
274
+ hex.slice(0, 8),
275
+ hex.slice(8, 12),
276
+ hex.slice(12, 16),
277
+ hex.slice(16, 20),
278
+ hex.slice(20, 32)
279
+ ].join('-');
280
+ } catch {
281
+ return value;
282
+ }
283
+
284
+ case 'h': // Hex string
285
+ try {
286
+ const binary = Buffer.from(content, 'base64');
287
+ return binary.toString('hex');
288
+ } catch {
289
+ return value;
290
+ }
291
+
292
+ case 't': // Timestamp
293
+ case 'n': // Number
294
+ try {
295
+ const num = fromBase62(content);
296
+ // If decoding failed, return original
297
+ if (isNaN(num)) return value;
298
+ return String(num);
299
+ } catch {
300
+ return value;
301
+ }
302
+
303
+ case 'b': // Base64
304
+ try {
305
+ return Buffer.from(content, 'base64').toString('utf8');
306
+ } catch {
307
+ return value;
308
+ }
309
+
310
+ case '%': // URL encoded
311
+ try {
312
+ return decodeURIComponent(content);
313
+ } catch {
314
+ return value;
315
+ }
316
+
317
+ case '=': // Unencoded ASCII
318
+ return content;
319
+
320
+ default:
321
+ // No prefix - return as is
322
+ return value;
323
+ }
324
+ }
325
+
326
+ /**
327
+ * Calculate size for advanced encoding
328
+ */
329
+ export function calculateAdvancedSize(value) {
330
+ const result = advancedEncode(value);
331
+ const originalSize = Buffer.byteLength(String(value), 'utf8');
332
+ const encodedSize = Buffer.byteLength(result.encoded, 'utf8');
333
+
334
+ return {
335
+ original: originalSize,
336
+ encoded: encodedSize,
337
+ method: result.method,
338
+ savings: originalSize > 0 ? Math.round((1 - encodedSize/originalSize) * 100) : 0,
339
+ ratio: originalSize > 0 ? encodedSize / originalSize : 1
340
+ };
341
+ }
342
+
343
+ /**
344
+ * Encode all values in a metadata object
345
+ */
346
+ export function encodeMetadata(metadata) {
347
+ if (!metadata || typeof metadata !== 'object') return metadata;
348
+
349
+ const encoded = {};
350
+
351
+ for (const [key, value] of Object.entries(metadata)) {
352
+ if (value === null || value === undefined) {
353
+ encoded[key] = value;
354
+ } else if (Array.isArray(value)) {
355
+ encoded[key] = value.map(v => {
356
+ if (typeof v === 'string') {
357
+ return advancedEncode(v).encoded;
358
+ }
359
+ return v;
360
+ });
361
+ } else if (typeof value === 'object' && !(value instanceof Date)) {
362
+ encoded[key] = encodeMetadata(value);
363
+ } else if (typeof value === 'string') {
364
+ encoded[key] = advancedEncode(value).encoded;
365
+ } else if (value instanceof Date) {
366
+ encoded[key] = advancedEncode(value.toISOString()).encoded;
367
+ } else {
368
+ encoded[key] = value;
369
+ }
370
+ }
371
+
372
+ return encoded;
373
+ }
374
+
375
+ /**
376
+ * Decode all values in a metadata object
377
+ */
378
+ export function decodeMetadata(metadata) {
379
+ if (!metadata || typeof metadata !== 'object') return metadata;
380
+
381
+ const decoded = {};
382
+
383
+ for (const [key, value] of Object.entries(metadata)) {
384
+ if (value === null || value === undefined) {
385
+ decoded[key] = value;
386
+ } else if (Array.isArray(value)) {
387
+ decoded[key] = value.map(v => {
388
+ if (typeof v === 'string') {
389
+ return advancedDecode(v);
390
+ }
391
+ return v;
392
+ });
393
+ } else if (typeof value === 'object') {
394
+ decoded[key] = decodeMetadata(value);
395
+ } else if (typeof value === 'string') {
396
+ decoded[key] = advancedDecode(value);
397
+ } else {
398
+ decoded[key] = value;
399
+ }
400
+ }
401
+
402
+ return decoded;
403
+ }
404
+
405
+ /**
406
+ * Batch optimize an object's values
407
+ */
408
+ export function optimizeObjectValues(obj) {
409
+ const optimized = {};
410
+ const stats = {
411
+ totalOriginal: 0,
412
+ totalOptimized: 0,
413
+ methods: {}
414
+ };
415
+
416
+ for (const [key, value] of Object.entries(obj)) {
417
+ const result = advancedEncode(value);
418
+ optimized[key] = result.encoded;
419
+
420
+ const originalSize = Buffer.byteLength(String(value), 'utf8');
421
+ const optimizedSize = Buffer.byteLength(result.encoded, 'utf8');
422
+
423
+ stats.totalOriginal += originalSize;
424
+ stats.totalOptimized += optimizedSize;
425
+ stats.methods[result.method] = (stats.methods[result.method] || 0) + 1;
426
+ }
427
+
428
+ stats.savings = stats.totalOriginal > 0 ?
429
+ Math.round((1 - stats.totalOptimized/stats.totalOriginal) * 100) : 0;
430
+
431
+ return { optimized, stats };
432
+ }
433
+
434
+ // Backwards compatibility exports
435
+ export {
436
+ advancedEncode as ultraEncode,
437
+ advancedDecode as ultraDecode,
438
+ calculateAdvancedSize as calculateUltraSize,
439
+ optimizeObjectValues as ultraOptimizeObject
440
+ };
@@ -1,3 +1,8 @@
1
+ // Memory cache for UTF-8 byte calculations
2
+ // Using Map for simple strings, with a max size to prevent memory leaks
3
+ const utf8BytesMemory = new Map();
4
+ const UTF8_MEMORY_MAX_SIZE = 10000; // Limit memory size
5
+
1
6
  /**
2
7
  * Calculates the size in bytes of a string using UTF-8 encoding
3
8
  * @param {string} str - The string to calculate size for
@@ -8,6 +13,11 @@ export function calculateUTF8Bytes(str) {
8
13
  str = String(str);
9
14
  }
10
15
 
16
+ // Check memory first
17
+ if (utf8BytesMemory.has(str)) {
18
+ return utf8BytesMemory.get(str);
19
+ }
20
+
11
21
  let bytes = 0;
12
22
  for (let i = 0; i < str.length; i++) {
13
23
  const codePoint = str.codePointAt(i);
@@ -31,9 +41,35 @@ export function calculateUTF8Bytes(str) {
31
41
  }
32
42
  }
33
43
 
44
+ // Add to memory if under size limit
45
+ if (utf8BytesMemory.size < UTF8_MEMORY_MAX_SIZE) {
46
+ utf8BytesMemory.set(str, bytes);
47
+ } else if (utf8BytesMemory.size === UTF8_MEMORY_MAX_SIZE) {
48
+ // Simple LRU: clear half of memory when full
49
+ const entriesToDelete = Math.floor(UTF8_MEMORY_MAX_SIZE / 2);
50
+ let deleted = 0;
51
+ for (const key of utf8BytesMemory.keys()) {
52
+ if (deleted >= entriesToDelete) break;
53
+ utf8BytesMemory.delete(key);
54
+ deleted++;
55
+ }
56
+ utf8BytesMemory.set(str, bytes);
57
+ }
58
+
34
59
  return bytes;
35
60
  }
36
61
 
62
+ /**
63
+ * Clear the UTF-8 memory cache (useful for testing or memory management)
64
+ */
65
+ export function clearUTF8Memory() {
66
+ utf8BytesMemory.clear();
67
+ }
68
+
69
+ // Aliases for backward compatibility
70
+ export const clearUTF8Memo = clearUTF8Memory;
71
+ export const clearUTF8Cache = clearUTF8Memory;
72
+
37
73
  /**
38
74
  * Calculates the size in bytes of attribute names (mapped to digits)
39
75
  * @param {Object} mappedObject - The object returned by schema.mapper()