s3db.js 8.2.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Metadata encoding for S3
3
+ * Chooses optimal encoding based on content analysis
4
+ */
5
+
6
+ /**
7
+ * Analyze string content to determine best encoding strategy
8
+ * @param {string} str - String to analyze
9
+ * @returns {Object} Analysis result with encoding recommendation
10
+ */
11
+ export function analyzeString(str) {
12
+ if (!str || typeof str !== 'string') {
13
+ return { type: 'none', safe: true };
14
+ }
15
+
16
+ let hasAscii = false;
17
+ let hasLatin1 = false;
18
+ let hasMultibyte = false;
19
+ let asciiCount = 0;
20
+ let latin1Count = 0;
21
+ let multibyteCount = 0;
22
+
23
+ for (let i = 0; i < str.length; i++) {
24
+ const code = str.charCodeAt(i);
25
+
26
+ if (code >= 0x20 && code <= 0x7E) {
27
+ // Safe ASCII printable characters
28
+ hasAscii = true;
29
+ asciiCount++;
30
+ } else if (code < 0x20 || code === 0x7F) {
31
+ // Control characters - treat as multibyte since they need encoding
32
+ hasMultibyte = true;
33
+ multibyteCount++;
34
+ } else if (code >= 0x80 && code <= 0xFF) {
35
+ // Latin-1 extended characters
36
+ hasLatin1 = true;
37
+ latin1Count++;
38
+ } else {
39
+ // Multibyte UTF-8 characters
40
+ hasMultibyte = true;
41
+ multibyteCount++;
42
+ }
43
+ }
44
+
45
+ // Pure ASCII - no encoding needed
46
+ if (!hasLatin1 && !hasMultibyte) {
47
+ return {
48
+ type: 'ascii',
49
+ safe: true,
50
+ stats: { ascii: asciiCount, latin1: 0, multibyte: 0 }
51
+ };
52
+ }
53
+
54
+ // Has multibyte characters (emoji, CJK, etc)
55
+ // These MUST be encoded as S3 rejects them
56
+ if (hasMultibyte) {
57
+ // If mostly multibyte, base64 is more efficient
58
+ const multibyteRatio = multibyteCount / str.length;
59
+ if (multibyteRatio > 0.3) {
60
+ return {
61
+ type: 'base64',
62
+ safe: false,
63
+ reason: 'high multibyte content',
64
+ stats: { ascii: asciiCount, latin1: latin1Count, multibyte: multibyteCount }
65
+ };
66
+ }
67
+ // Mixed content with some multibyte - use URL encoding
68
+ return {
69
+ type: 'url',
70
+ safe: false,
71
+ reason: 'contains multibyte characters',
72
+ stats: { ascii: asciiCount, latin1: latin1Count, multibyte: multibyteCount }
73
+ };
74
+ }
75
+
76
+ // Only Latin-1 extended characters
77
+ // These get corrupted but don't cause errors
78
+ // Choose based on efficiency: if Latin-1 is >50% of string, use base64
79
+ const latin1Ratio = latin1Count / str.length;
80
+ if (latin1Ratio > 0.5) {
81
+ return {
82
+ type: 'base64',
83
+ safe: false,
84
+ reason: 'high Latin-1 content',
85
+ stats: { ascii: asciiCount, latin1: latin1Count, multibyte: 0 }
86
+ };
87
+ }
88
+
89
+ return {
90
+ type: 'url',
91
+ safe: false,
92
+ reason: 'contains Latin-1 extended characters',
93
+ stats: { ascii: asciiCount, latin1: latin1Count, multibyte: 0 }
94
+ };
95
+ }
96
+
97
+ /**
98
+ * Encode a string for S3 metadata
99
+ * @param {string} value - Value to encode
100
+ * @returns {Object} Encoded value with metadata
101
+ */
102
+ export function metadataEncode(value) {
103
+ // Preserve null and undefined as special string values
104
+ if (value === null) {
105
+ return { encoded: 'null', encoding: 'special' };
106
+ }
107
+ if (value === undefined) {
108
+ return { encoded: 'undefined', encoding: 'special' };
109
+ }
110
+
111
+ const stringValue = String(value);
112
+ const analysis = analyzeString(stringValue);
113
+
114
+ switch (analysis.type) {
115
+ case 'none':
116
+ case 'ascii':
117
+ // No encoding needed
118
+ return {
119
+ encoded: stringValue,
120
+ encoding: 'none',
121
+ analysis
122
+ };
123
+
124
+ case 'url':
125
+ // URL encoding - prefix with 'u:' to indicate encoding
126
+ return {
127
+ encoded: 'u:' + encodeURIComponent(stringValue),
128
+ encoding: 'url',
129
+ analysis
130
+ };
131
+
132
+ case 'base64':
133
+ // Base64 encoding - prefix with 'b:' to indicate encoding
134
+ return {
135
+ encoded: 'b:' + Buffer.from(stringValue, 'utf8').toString('base64'),
136
+ encoding: 'base64',
137
+ analysis
138
+ };
139
+
140
+ default:
141
+ // Fallback to base64 for safety
142
+ return {
143
+ encoded: 'b:' + Buffer.from(stringValue, 'utf8').toString('base64'),
144
+ encoding: 'base64',
145
+ analysis
146
+ };
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Decode a string from S3 metadata
152
+ * @param {string} value - Value to decode
153
+ * @returns {string} Decoded value
154
+ */
155
+ export function metadataDecode(value) {
156
+ // Handle special values
157
+ if (value === 'null') {
158
+ return null;
159
+ }
160
+ if (value === 'undefined') {
161
+ return undefined;
162
+ }
163
+
164
+ if (value === null || value === undefined || typeof value !== 'string') {
165
+ return value;
166
+ }
167
+
168
+ // Check for encoding prefix
169
+ if (value.startsWith('u:')) {
170
+ // URL encoded - but check if there's content after prefix
171
+ if (value.length === 2) return value; // Just "u:" without content
172
+ try {
173
+ return decodeURIComponent(value.substring(2));
174
+ } catch (err) {
175
+ // If decode fails, return original
176
+ return value;
177
+ }
178
+ }
179
+
180
+ if (value.startsWith('b:')) {
181
+ // Base64 encoded - but check if there's content after prefix
182
+ if (value.length === 2) return value; // Just "b:" without content
183
+ try {
184
+ const decoded = Buffer.from(value.substring(2), 'base64').toString('utf8');
185
+ return decoded;
186
+ } catch (err) {
187
+ // If decode fails, return original
188
+ return value;
189
+ }
190
+ }
191
+
192
+ // No prefix - return as is (backwards compatibility)
193
+ // Try to detect if it's base64 without prefix (legacy)
194
+ if (value.length > 0 && /^[A-Za-z0-9+/]+=*$/.test(value)) {
195
+ try {
196
+ const decoded = Buffer.from(value, 'base64').toString('utf8');
197
+ // Verify it's valid UTF-8 with special chars
198
+ if (/[^\x00-\x7F]/.test(decoded) && Buffer.from(decoded, 'utf8').toString('base64') === value) {
199
+ return decoded;
200
+ }
201
+ } catch {
202
+ // Not base64, return as is
203
+ }
204
+ }
205
+
206
+ return value;
207
+ }
208
+
209
+ /**
210
+ * Calculate the encoded size for a given value
211
+ * @param {string} value - Value to calculate size for
212
+ * @returns {Object} Size information
213
+ */
214
+ // Backwards compatibility exports
215
+ export { metadataEncode as smartEncode, metadataDecode as smartDecode };
216
+
217
+ export function calculateEncodedSize(value) {
218
+ const analysis = analyzeString(value);
219
+ const originalSize = Buffer.byteLength(value, 'utf8');
220
+
221
+ let encodedSize;
222
+ switch (analysis.type) {
223
+ case 'none':
224
+ case 'ascii':
225
+ encodedSize = originalSize;
226
+ break;
227
+ case 'url':
228
+ encodedSize = 2 + encodeURIComponent(value).length; // 'u:' prefix
229
+ break;
230
+ case 'base64':
231
+ encodedSize = 2 + Buffer.from(value, 'utf8').toString('base64').length; // 'b:' prefix
232
+ break;
233
+ default:
234
+ encodedSize = 2 + Buffer.from(value, 'utf8').toString('base64').length;
235
+ }
236
+
237
+ return {
238
+ original: originalSize,
239
+ encoded: encodedSize,
240
+ overhead: encodedSize - originalSize,
241
+ ratio: encodedSize / originalSize,
242
+ encoding: analysis.type
243
+ };
244
+ }
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Optimized encoding for S3 metadata without prefixes where possible
3
+ * Uses heuristics to minimize overhead while maintaining reliability
4
+ */
5
+
6
+ /**
7
+ * Check if a string looks like base64
8
+ */
9
+ function looksLikeBase64(str) {
10
+ if (!str || str.length < 4) return false;
11
+ // Base64 pattern with optional padding
12
+ return /^[A-Za-z0-9+/]+=*$/.test(str) && str.length % 4 === 0;
13
+ }
14
+
15
+ /**
16
+ * Smart encode with minimal overhead
17
+ */
18
+ export function optimizedEncode(value) {
19
+ // Handle special values
20
+ if (value === null) return 'null';
21
+ if (value === undefined) return 'undefined';
22
+
23
+ const str = String(value);
24
+
25
+ // Empty string
26
+ if (str === '') return '';
27
+
28
+ // Check if pure ASCII (printable characters only)
29
+ if (/^[\x20-\x7E]*$/.test(str)) {
30
+ // Pure ASCII - but check if it looks like base64 to avoid confusion
31
+ if (looksLikeBase64(str)) {
32
+ // Add a marker to distinguish from actual base64
33
+ return '!' + str;
34
+ }
35
+ return str;
36
+ }
37
+
38
+ // Has non-ASCII characters - must encode
39
+ const hasMultibyte = /[^\x00-\xFF]/.test(str);
40
+
41
+ if (hasMultibyte) {
42
+ // Has emoji/CJK - must use base64
43
+ return Buffer.from(str, 'utf8').toString('base64');
44
+ }
45
+
46
+ // Only Latin-1 extended - calculate which is more efficient
47
+ const base64 = Buffer.from(str, 'utf8').toString('base64');
48
+ const urlEncoded = encodeURIComponent(str);
49
+
50
+ // Use whichever is shorter
51
+ if (urlEncoded.length <= base64.length) {
52
+ return '%' + urlEncoded; // % prefix for URL encoded
53
+ }
54
+
55
+ return base64;
56
+ }
57
+
58
+ /**
59
+ * Smart decode with minimal overhead
60
+ */
61
+ export function optimizedDecode(value) {
62
+ if (value === 'null') return null;
63
+ if (value === 'undefined') return undefined;
64
+ if (value === '' || value === null || value === undefined) return value;
65
+
66
+ const str = String(value);
67
+
68
+ // Check for our markers
69
+ if (str.startsWith('!')) {
70
+ // ASCII that looked like base64
71
+ return str.substring(1);
72
+ }
73
+
74
+ if (str.startsWith('%')) {
75
+ // URL encoded
76
+ try {
77
+ return decodeURIComponent(str.substring(1));
78
+ } catch {
79
+ return str;
80
+ }
81
+ }
82
+
83
+ // Try to detect base64
84
+ if (looksLikeBase64(str)) {
85
+ try {
86
+ const decoded = Buffer.from(str, 'base64').toString('utf8');
87
+ // Verify it's valid UTF-8 with non-ASCII
88
+ if (/[^\x00-\x7F]/.test(decoded)) {
89
+ // Check if re-encoding matches
90
+ if (Buffer.from(decoded, 'utf8').toString('base64') === str) {
91
+ return decoded;
92
+ }
93
+ }
94
+ } catch {
95
+ // Not base64
96
+ }
97
+ }
98
+
99
+ // Return as-is
100
+ return str;
101
+ }
102
+
103
+ /**
104
+ * Compare encoding strategies
105
+ */
106
+ export function compareEncodings(value) {
107
+ const str = String(value);
108
+ const originalBytes = Buffer.byteLength(str, 'utf8');
109
+
110
+ // Calculate all options
111
+ const base64 = Buffer.from(str, 'utf8').toString('base64');
112
+ const base64WithPrefix = 'b:' + base64;
113
+ const urlEncoded = encodeURIComponent(str);
114
+ const urlWithPrefix = 'u:' + urlEncoded;
115
+ const optimized = optimizedEncode(value);
116
+
117
+ return {
118
+ original: originalBytes,
119
+ base64Pure: base64.length,
120
+ base64Prefixed: base64WithPrefix.length,
121
+ urlPure: urlEncoded.length,
122
+ urlPrefixed: urlWithPrefix.length,
123
+ optimized: optimized.length,
124
+ optimizedMethod:
125
+ optimized === str ? 'none' :
126
+ optimized.startsWith('!') ? 'ascii-marked' :
127
+ optimized.startsWith('%') ? 'url' :
128
+ looksLikeBase64(optimized) ? 'base64' : 'unknown'
129
+ };
130
+ }