@onlineapps/content-resolver 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +103 -11
  2. package/package.json +2 -2
  3. package/src/index.js +300 -54
package/README.md CHANGED
@@ -1,12 +1,21 @@
1
1
  # @onlineapps/content-resolver
2
2
 
3
- Automatic conversion between text content and storage references.
3
+ Automatic conversion between text content and storage references with **Content Descriptor Pattern**.
4
+
5
+ ## Overview
6
+
7
+ ContentResolver provides unified API for working with:
8
+ - **Inline strings** - Small text content (< 16KB)
9
+ - **File references** - Large content stored in MinIO
10
+ - **Binary files** - PDFs, images, etc.
11
+
12
+ All content is represented as **Content Descriptors** with consistent metadata (filename, content_type, size, fingerprint).
4
13
 
5
14
  ## Threshold
6
15
 
7
16
  Default threshold: **16 KB (16384 bytes)**
8
17
 
9
- Content larger than threshold is automatically stored in MinIO and replaced with a reference.
18
+ Content larger than threshold is automatically stored in MinIO and returned as a Descriptor.
10
19
 
11
20
  ## Usage
12
21
 
@@ -29,9 +38,16 @@ const resolver = new ContentResolver({
29
38
  const content = await resolver.resolve('minio://workflow/path/to/file.txt');
30
39
  // → Returns actual file content
31
40
 
32
- // Store large content as reference
33
- const result = await resolver.store(largeText, { workflow_id: 'wf-123' });
34
- // → { value: 'minio://workflow/...', stored: true, size: 50000 }
41
+ // Store large content - returns Content Descriptor
42
+ const descriptor = await resolver.store(largeText, { workflow_id: 'wf-123' }, 'document.html');
43
+ // → {
44
+ // type: 'file',
45
+ // storage_ref: 'minio://workflow/...',
46
+ // filename: 'document.html',
47
+ // content_type: 'text/html',
48
+ // size: 50000,
49
+ // fingerprint: 'sha256...'
50
+ // }
35
51
  ```
36
52
 
37
53
  ### In Business Service Handler
@@ -71,26 +87,102 @@ exports.processDocument = async (input, context = {}) => {
71
87
  If value is a reference (`minio://...`), downloads and returns content.
72
88
  Otherwise returns value unchanged.
73
89
 
74
- #### `store(content, context, filename?): Promise<Object>`
75
- If content size > threshold, stores in MinIO and returns reference.
76
- Returns `{ value, stored, size, fingerprint? }`.
90
+ #### `store(content, context, filename?, content_type?): Promise<Object>`
91
+ Stores content and returns **Content Descriptor**. If size > threshold, stores in MinIO.
92
+ Returns Descriptor with `type: 'inline'` or `type: 'file'`.
93
+
94
+ #### `getAsBuffer(value): Promise<Buffer>`
95
+ Unified API to get content as Buffer. Accepts:
96
+ - Plain string → converts to Buffer
97
+ - Storage reference (`minio://...`) → downloads and returns Buffer
98
+ - Content Descriptor → extracts content as Buffer
99
+
100
+ #### `getAsString(value): Promise<string>`
101
+ Unified API to get content as string. Works with string, reference, or Descriptor.
102
+
103
+ #### `getMetadata(value): Object`
104
+ Get metadata (filename, content_type, size, fingerprint) from any value type.
105
+
106
+ #### `createDescriptor(content, options): Promise<Object>`
107
+ Create Content Descriptor from raw content. Automatically decides inline vs file storage.
108
+
109
+ #### `normalizeToDescriptor(value, options): Promise<Object>`
110
+ Normalize any value (string, reference, Buffer) to Content Descriptor.
77
111
 
78
112
  #### `resolveInput(input, fields?): Promise<Object>`
79
- Resolves all reference fields in input object.
113
+ Resolves all reference fields in input object (legacy method).
80
114
 
81
115
  #### `storeOutput(output, context, fields?): Promise<Object>`
82
- Stores large content fields as references.
116
+ Stores large content fields as Descriptors (returns Descriptors, not plain strings).
117
+
118
+ ## Content Descriptor Pattern
119
+
120
+ ### Descriptor Structure
121
+
122
+ ```javascript
123
+ // Inline content (small)
124
+ {
125
+ type: 'inline',
126
+ content: 'actual text content',
127
+ encoding: 'utf-8',
128
+ filename: 'document.txt',
129
+ content_type: 'text/plain',
130
+ size: 1234,
131
+ fingerprint: 'sha256...'
132
+ }
133
+
134
+ // File content (large or binary)
135
+ {
136
+ type: 'file',
137
+ storage_ref: 'minio://workflow/path/to/file',
138
+ filename: 'invoice.pdf',
139
+ content_type: 'application/pdf',
140
+ size: 56607,
141
+ fingerprint: 'sha256...'
142
+ }
143
+ ```
144
+
145
+ ### Usage Example
146
+
147
+ ```javascript
148
+ const resolver = new ContentResolver();
149
+
150
+ // Work with attachments - unified API
151
+ async function processAttachment(attachment) {
152
+ // Get content as Buffer (works with string, reference, or Descriptor)
153
+ const buffer = await resolver.getAsBuffer(attachment.value);
154
+
155
+ // Get metadata
156
+ const meta = resolver.getMetadata(attachment.value);
157
+ console.log(`Processing ${meta.filename} (${meta.size} bytes)`);
158
+
159
+ // Process buffer...
160
+ return processed;
161
+ }
162
+ ```
83
163
 
84
164
  ### Utilities
85
165
 
86
166
  ```javascript
87
- const { isReference, parseReference, DEFAULT_THRESHOLD } = require('@onlineapps/content-resolver');
167
+ const {
168
+ isReference,
169
+ parseReference,
170
+ isDescriptor,
171
+ getContentType,
172
+ DEFAULT_THRESHOLD
173
+ } = require('@onlineapps/content-resolver');
88
174
 
89
175
  isReference('minio://bucket/path'); // true
90
176
  isReference('plain text'); // false
91
177
 
178
+ isDescriptor({ type: 'file', storage_ref: '...' }); // true
179
+ isDescriptor('plain string'); // false
180
+
92
181
  parseReference('minio://workflow/content/file.txt');
93
182
  // → { bucket: 'workflow', path: 'content/file.txt' }
183
+
184
+ getContentType('invoice.pdf'); // 'application/pdf'
185
+ getContentType('document.html'); // 'text/html'
94
186
  ```
95
187
 
96
188
  ## Reference Formats
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@onlineapps/content-resolver",
3
- "version": "1.0.2",
4
- "description": "Automatic conversion between text content and storage references",
3
+ "version": "1.1.1",
4
+ "description": "Automatic conversion between text content and storage references with Content Descriptor pattern",
5
5
  "main": "src/index.js",
6
6
  "scripts": {
7
7
  "test": "jest"
package/src/index.js CHANGED
@@ -2,13 +2,15 @@
2
2
  * ContentResolver - Automatic conversion between text content and storage references
3
3
  *
4
4
  * Handles transparent conversion:
5
- * - Large text → stored as file, returns reference
5
+ * - Large text → stored as file, returns Descriptor
6
6
  * - Reference → downloads content, returns text
7
+ * - Unified Content Descriptor pattern for transparent file/string handling
7
8
  *
8
9
  * Used by business services to handle both inline content and file references uniformly.
9
10
  */
10
11
 
11
12
  const StorageConnector = require('@onlineapps/conn-base-storage');
13
+ const crypto = require('crypto');
12
14
 
13
15
  // Default threshold: 16KB (16384 bytes)
14
16
  const DEFAULT_THRESHOLD = 16 * 1024;
@@ -17,6 +19,50 @@ const DEFAULT_THRESHOLD = 16 * 1024;
17
19
  const MINIO_REF_PATTERN = /^minio:\/\/([^/]+)\/(.+)$/;
18
20
  const INTERNAL_REF_PATTERN = /^internal:\/\/storage\/(.+)$/;
19
21
 
22
+ /**
23
+ * Check if a value is a Content Descriptor
24
+ * @param {*} value - Value to check
25
+ * @returns {boolean} True if value is a Descriptor object
26
+ */
27
+ function isDescriptor(value) {
28
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
29
+ return false;
30
+ }
31
+ return value.type === 'inline' || value.type === 'file';
32
+ }
33
+
34
+ /**
35
+ * Get content type from filename or content
36
+ * @param {string} filename - Filename with extension
37
+ * @param {string|Buffer} content - Content to analyze
38
+ * @returns {string} MIME type
39
+ */
40
+ function getContentType(filename, content) {
41
+ if (filename) {
42
+ const ext = filename.split('.').pop()?.toLowerCase();
43
+ const types = {
44
+ 'html': 'text/html',
45
+ 'htm': 'text/html',
46
+ 'md': 'text/markdown',
47
+ 'txt': 'text/plain',
48
+ 'json': 'application/json',
49
+ 'pdf': 'application/pdf',
50
+ 'png': 'image/png',
51
+ 'jpg': 'image/jpeg',
52
+ 'jpeg': 'image/jpeg',
53
+ 'gif': 'image/gif',
54
+ 'svg': 'image/svg+xml'
55
+ };
56
+ if (types[ext]) return types[ext];
57
+ }
58
+
59
+ // Fallback
60
+ if (typeof content === 'string' && content.trim().startsWith('<')) {
61
+ return 'text/html';
62
+ }
63
+ return 'text/plain';
64
+ }
65
+
20
66
  /**
21
67
  * Check if a value is a storage reference
22
68
  * @param {string} value - Value to check
@@ -138,57 +184,32 @@ class ContentResolver {
138
184
  }
139
185
 
140
186
  /**
141
- * Store content if above threshold, otherwise return as-is
142
- * @param {string} content - Text content to potentially store
187
+ * Store content if above threshold, otherwise return as Descriptor
188
+ * @param {string|Buffer} content - Text or binary content to potentially store
143
189
  * @param {Object} context - Workflow context for path generation
144
190
  * @param {string} [context.workflow_id] - Workflow ID
145
191
  * @param {string} [context.step_id] - Step ID
146
192
  * @param {string} [filename] - Optional filename hint
147
- * @returns {Promise<{ value: string, stored: boolean, size: number }>}
193
+ * @param {string} [content_type] - Optional MIME type
194
+ * @returns {Promise<Object>} Content Descriptor
148
195
  */
149
- async store(content, context = {}, filename = null) {
150
- if (!content || typeof content !== 'string') {
151
- return { value: content, stored: false, size: 0 };
152
- }
153
-
154
- const size = Buffer.byteLength(content, 'utf-8');
155
-
156
- // Below threshold - return as-is
157
- if (size <= this.threshold) {
158
- return { value: content, stored: false, size };
159
- }
160
-
161
- // Above threshold - store in MinIO
162
- try {
163
- const storage = await this.getStorage();
164
-
165
- const workflowId = context.workflow_id || 'standalone';
166
- const stepId = context.step_id || 'content';
167
- const pathPrefix = `content/${workflowId}/${stepId}`;
168
-
169
- const buffer = Buffer.from(content, 'utf-8');
170
- const result = await storage.uploadWithFingerprint(
171
- 'workflow',
172
- buffer,
173
- pathPrefix,
174
- filename ? filename.split('.').pop() : 'txt'
175
- );
176
-
177
- const ref = `minio://workflow/${result.path}`;
178
- this.logger.debug?.(`[ContentResolver] Stored ${size} bytes as ${ref}`);
179
-
180
- return {
181
- value: ref,
182
- stored: true,
183
- size,
184
- fingerprint: result.fingerprint
196
+ async store(content, context = {}, filename = null, content_type = null) {
197
+ if (!content) {
198
+ return {
199
+ type: 'inline',
200
+ content: '',
201
+ encoding: 'utf-8',
202
+ filename: filename || 'empty.txt',
203
+ content_type: content_type || 'text/plain',
204
+ size: 0
185
205
  };
186
- } catch (error) {
187
- this.logger.error(`[ContentResolver] Failed to store content: ${error.message}`);
188
- // Fallback: return original content if storage fails
189
- this.logger.warn('[ContentResolver] Falling back to inline content');
190
- return { value: content, stored: false, size, warning: error.message };
191
206
  }
207
+
208
+ return await this.createDescriptor(content, {
209
+ filename,
210
+ content_type,
211
+ context
212
+ });
192
213
  }
193
214
 
194
215
  /**
@@ -215,11 +236,11 @@ class ContentResolver {
215
236
  }
216
237
 
217
238
  /**
218
- * Process an output object - store large string fields as references
239
+ * Process an output object - store large string fields as Descriptors
219
240
  * @param {Object} output - Output object with potential large content
220
241
  * @param {Object} context - Workflow context
221
242
  * @param {string[]} [fields] - Specific fields to process (default: all string fields)
222
- * @returns {Promise<Object>} Output with large content stored as references
243
+ * @returns {Promise<Object>} Output with large content stored as Descriptors
223
244
  */
224
245
  async storeOutput(output, context = {}, fields = null) {
225
246
  if (!output || typeof output !== 'object') {
@@ -230,13 +251,9 @@ class ContentResolver {
230
251
  const fieldsToProcess = fields || Object.keys(output);
231
252
 
232
253
  for (const field of fieldsToProcess) {
233
- if (typeof result[field] === 'string') {
234
- const stored = await this.store(result[field], context, field);
235
- result[field] = stored.value;
236
- if (stored.stored) {
237
- result[`${field}_stored`] = true;
238
- result[`${field}_size`] = stored.size;
239
- }
254
+ if (typeof result[field] === 'string' || Buffer.isBuffer(result[field])) {
255
+ const descriptor = await this.store(result[field], context, field);
256
+ result[field] = descriptor;
240
257
  }
241
258
  }
242
259
 
@@ -258,6 +275,233 @@ class ContentResolver {
258
275
  setThreshold(bytes) {
259
276
  this.threshold = bytes;
260
277
  }
278
+
279
+ /**
280
+ * Download content from storage reference as Buffer
281
+ * @private
282
+ * @param {string} ref - Storage reference
283
+ * @returns {Promise<Buffer>} File content as Buffer
284
+ */
285
+ async downloadAsBuffer(ref) {
286
+ const parsed = parseReference(ref);
287
+ if (!parsed) {
288
+ throw new Error(`Invalid storage reference: ${ref}`);
289
+ }
290
+
291
+ const storage = await this.getStorage();
292
+ const stream = await storage.client.getObject(parsed.bucket, parsed.path);
293
+
294
+ const chunks = [];
295
+ for await (const chunk of stream) {
296
+ chunks.push(chunk);
297
+ }
298
+
299
+ return Buffer.concat(chunks);
300
+ }
301
+
302
+ /**
303
+ * Get content as Buffer - unified API for string, reference, or Descriptor
304
+ * @param {string|Object} value - String, reference, or Content Descriptor
305
+ * @returns {Promise<Buffer>} Content as Buffer
306
+ */
307
+ async getAsBuffer(value) {
308
+ // Plain string (backward compatibility)
309
+ if (typeof value === 'string') {
310
+ if (isReference(value)) {
311
+ return await this.downloadAsBuffer(value);
312
+ }
313
+ return Buffer.from(value, 'utf-8');
314
+ }
315
+
316
+ // Content Descriptor
317
+ if (isDescriptor(value)) {
318
+ if (value.type === 'file') {
319
+ return await this.downloadAsBuffer(value.storage_ref);
320
+ }
321
+ // type === 'inline'
322
+ const encoding = value.encoding || 'utf-8';
323
+ return Buffer.from(value.content, encoding);
324
+ }
325
+
326
+ // Fallback: try to convert to string
327
+ return Buffer.from(String(value), 'utf-8');
328
+ }
329
+
330
+ /**
331
+ * Get content as string - unified API for string, reference, or Descriptor
332
+ * @param {string|Object} value - String, reference, or Content Descriptor
333
+ * @returns {Promise<string>} Content as string
334
+ */
335
+ async getAsString(value) {
336
+ const buffer = await this.getAsBuffer(value);
337
+ return buffer.toString('utf-8');
338
+ }
339
+
340
+ /**
341
+ * Get metadata from value - unified API
342
+ * @param {string|Object} value - String, reference, or Content Descriptor
343
+ * @returns {Object} Metadata object with filename, content_type, size, fingerprint
344
+ */
345
+ getMetadata(value) {
346
+ // Plain string
347
+ if (typeof value === 'string') {
348
+ return {
349
+ filename: 'content.txt',
350
+ content_type: 'text/plain',
351
+ size: Buffer.byteLength(value, 'utf-8')
352
+ };
353
+ }
354
+
355
+ // Content Descriptor
356
+ if (isDescriptor(value)) {
357
+ return {
358
+ filename: value.filename || 'content',
359
+ content_type: value.content_type || 'text/plain',
360
+ size: value.size || 0,
361
+ fingerprint: value.fingerprint || null
362
+ };
363
+ }
364
+
365
+ // Fallback
366
+ const str = String(value);
367
+ return {
368
+ filename: 'content.txt',
369
+ content_type: 'text/plain',
370
+ size: Buffer.byteLength(str, 'utf-8')
371
+ };
372
+ }
373
+
374
+ /**
375
+ * Create Content Descriptor from raw content
376
+ * @param {string|Buffer} content - Content to create descriptor for
377
+ * @param {Object} options - Options
378
+ * @param {string} [options.filename] - Filename hint
379
+ * @param {string} [options.content_type] - MIME type
380
+ * @param {Object} [options.context] - Workflow context
381
+ * @param {boolean} [options.forceFile=false] - Force storage as file even if small
382
+ * @returns {Promise<Object>} Content Descriptor
383
+ */
384
+ async createDescriptor(content, options = {}) {
385
+ const { filename, content_type, context = {}, forceFile = false } = options;
386
+
387
+ // Convert Buffer to string if needed
388
+ let contentString;
389
+ let isBinary = false;
390
+ if (Buffer.isBuffer(content)) {
391
+ isBinary = true;
392
+ // For binary, we'll store it directly
393
+ } else if (typeof content === 'string') {
394
+ contentString = content;
395
+ } else {
396
+ contentString = String(content);
397
+ }
398
+
399
+ // For binary content, always store as file
400
+ if (isBinary || forceFile) {
401
+ const storage = await this.getStorage();
402
+ const workflowId = context.workflow_id || 'standalone';
403
+ const stepId = context.step_id || 'content';
404
+ const pathPrefix = `content/${workflowId}/${stepId}`;
405
+
406
+ const buffer = Buffer.isBuffer(content) ? content : Buffer.from(contentString, 'utf-8');
407
+ const ext = filename ? filename.split('.').pop() : 'bin';
408
+
409
+ const result = await storage.uploadWithFingerprint(
410
+ 'workflow',
411
+ buffer,
412
+ pathPrefix,
413
+ ext
414
+ );
415
+
416
+ const finalFilename = filename || `${result.fingerprint.slice(0, 8)}.${ext}`;
417
+ const finalContentType = content_type || getContentType(finalFilename, buffer);
418
+
419
+ return {
420
+ type: 'file',
421
+ storage_ref: `minio://workflow/${result.path}`,
422
+ filename: finalFilename,
423
+ content_type: finalContentType,
424
+ size: buffer.length,
425
+ fingerprint: result.fingerprint
426
+ };
427
+ }
428
+
429
+ // For text content, check threshold
430
+ const size = Buffer.byteLength(contentString, 'utf-8');
431
+ const finalContentType = content_type || getContentType(filename, contentString);
432
+
433
+ if (size > this.threshold) {
434
+ // Store as file
435
+ const storage = await this.getStorage();
436
+ const workflowId = context.workflow_id || 'standalone';
437
+ const stepId = context.step_id || 'content';
438
+ const pathPrefix = `content/${workflowId}/${stepId}`;
439
+
440
+ const buffer = Buffer.from(contentString, 'utf-8');
441
+ const ext = filename ? filename.split('.').pop() : 'txt';
442
+
443
+ const result = await storage.uploadWithFingerprint(
444
+ 'workflow',
445
+ buffer,
446
+ pathPrefix,
447
+ ext
448
+ );
449
+
450
+ const finalFilename = filename || `${result.fingerprint.slice(0, 8)}.${ext}`;
451
+
452
+ return {
453
+ type: 'file',
454
+ storage_ref: `minio://workflow/${result.path}`,
455
+ filename: finalFilename,
456
+ content_type: finalContentType,
457
+ size: buffer.length,
458
+ fingerprint: result.fingerprint
459
+ };
460
+ }
461
+
462
+ // Small content - return as inline
463
+ const finalFilename = filename || `content_${Date.now()}.txt`;
464
+ const fingerprint = crypto.createHash('sha256').update(contentString).digest('hex');
465
+
466
+ return {
467
+ type: 'inline',
468
+ content: contentString,
469
+ encoding: 'utf-8',
470
+ filename: finalFilename,
471
+ content_type: finalContentType,
472
+ size: size,
473
+ fingerprint: fingerprint
474
+ };
475
+ }
476
+
477
+ /**
478
+ * Normalize value to Content Descriptor if needed
479
+ * @param {string|Object} value - String, reference, or Descriptor
480
+ * @param {Object} options - Options for descriptor creation
481
+ * @returns {Promise<Object>} Content Descriptor
482
+ */
483
+ async normalizeToDescriptor(value, options = {}) {
484
+ // Already a Descriptor
485
+ if (isDescriptor(value)) {
486
+ return value;
487
+ }
488
+
489
+ // Plain string or reference
490
+ if (typeof value === 'string') {
491
+ if (isReference(value)) {
492
+ // Download and create descriptor
493
+ const buffer = await this.downloadAsBuffer(value);
494
+ const parsed = parseReference(value);
495
+ const filename = parsed ? parsed.path.split('/').pop() : 'file';
496
+ return await this.createDescriptor(buffer, { ...options, filename, forceFile: true });
497
+ }
498
+ // Plain string - create descriptor
499
+ return await this.createDescriptor(value, options);
500
+ }
501
+
502
+ // Fallback
503
+ return await this.createDescriptor(String(value), options);
504
+ }
261
505
  }
262
506
 
263
507
  // Export class and utilities
@@ -265,5 +509,7 @@ module.exports = ContentResolver;
265
509
  module.exports.ContentResolver = ContentResolver;
266
510
  module.exports.isReference = isReference;
267
511
  module.exports.parseReference = parseReference;
512
+ module.exports.isDescriptor = isDescriptor;
513
+ module.exports.getContentType = getContentType;
268
514
  module.exports.DEFAULT_THRESHOLD = DEFAULT_THRESHOLD;
269
515