@semiont/content 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -105,7 +105,7 @@ var FilesystemRepresentationStore = class {
|
|
|
105
105
|
logger;
|
|
106
106
|
constructor(project, logger) {
|
|
107
107
|
this.logger = logger;
|
|
108
|
-
this.basePath = project.
|
|
108
|
+
this.basePath = project.representationsDir;
|
|
109
109
|
}
|
|
110
110
|
async store(content, metadata) {
|
|
111
111
|
const checksum = calculateChecksum(content);
|
|
@@ -119,7 +119,6 @@ var FilesystemRepresentationStore = class {
|
|
|
119
119
|
const cd = checksum.substring(2, 4);
|
|
120
120
|
const filePath = path.join(
|
|
121
121
|
this.basePath,
|
|
122
|
-
"representations",
|
|
123
122
|
mediaTypePath,
|
|
124
123
|
ab,
|
|
125
124
|
cd,
|
|
@@ -159,7 +158,6 @@ var FilesystemRepresentationStore = class {
|
|
|
159
158
|
const cd = checksum.substring(2, 4);
|
|
160
159
|
const filePath = path.join(
|
|
161
160
|
this.basePath,
|
|
162
|
-
"representations",
|
|
163
161
|
mediaTypePath,
|
|
164
162
|
ab,
|
|
165
163
|
cd,
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/representation-store.ts","../src/checksum.ts","../src/mime-extensions.ts"],"sourcesContent":["/**\n * RepresentationStore - Content-addressed storage for byte-level resource representations\n *\n * Handles storage and retrieval of concrete byte-level renditions of resources.\n * Uses content-addressed storage where the checksum IS the filename.\n * Supports multiple storage backends (filesystem, S3, IPFS, etc.)\n *\n * Storage structure (filesystem):\n * basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}\n *\n * Where:\n * - {mediaType} is base MIME type with \"/\" encoded as \"~1\" (e.g., \"text~1markdown\")\n * - {ab}/{cd} are first 4 hex digits of checksum for sharding\n * - {checksum} is the raw SHA-256 hex hash (e.g., \"5aaa0b72abc123...\")\n * - {extension} is derived from base MIME type (.md, .txt, .png, etc.)\n *\n * Example:\n * For content with checksum \"5aaa0b72abc123...\" and mediaType \"text/markdown; charset=iso-8859-1\":\n * - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md\n * - Stored mediaType: \"text/markdown; charset=iso-8859-1\" (full type with charset preserved)\n *\n * Character Encoding:\n * - Charset parameters in mediaType are preserved in metadata (e.g., \"text/plain; charset=iso-8859-1\")\n * - Storage path uses only base MIME type (strips charset for directory structure)\n * - Content stored as raw bytes - charset only affects decoding on retrieval\n *\n * This design provides:\n * - O(1) content retrieval by checksum + mediaType\n * - Automatic deduplication (identical content = same file)\n * - Idempotent storage operations\n * - Proper file extensions for filesystem browsing\n * - Faithful preservation of character encoding metadata\n */\n\nimport { promises as fs } from 'fs';\nimport path from 'path';\nimport type { SemiontProject } from '@semiont/core/node';\nimport type { Logger } from '@semiont/core';\nimport { calculateChecksum } from './checksum';\nimport { getExtensionForMimeType } from './mime-extensions';\n\n/**\n * Metadata for a representation being stored\n */\nexport interface RepresentationMetadata {\n mediaType: string; // REQUIRED - MIME type\n filename?: string;\n encoding?: string;\n language?: string;\n rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';\n}\n\n/**\n * Complete representation information\n */\nexport interface StoredRepresentation extends RepresentationMetadata {\n '@id': string; // Representation ID (same as checksum)\n byteSize: number; // Size in bytes\n checksum: string; // Raw SHA-256 hex hash\n created: string; // ISO 8601 timestamp\n}\n\n/**\n * Interface for representation storage backends\n */\nexport interface RepresentationStore {\n /**\n * Store content and return representation metadata\n *\n * @param content - Raw bytes to store\n * @param metadata - Representation metadata\n * @returns Complete representation info with checksum\n */\n store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;\n\n /**\n * Retrieve content by checksum (content-addressed lookup)\n *\n * @param checksum - Content checksum as raw hex (e.g., \"5aaa0b72...\")\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Raw bytes\n */\n retrieve(checksum: string, mediaType: string): Promise<Buffer>;\n}\n\n/**\n * Filesystem implementation of RepresentationStore\n */\nexport class FilesystemRepresentationStore implements RepresentationStore {\n private basePath: string;\n private logger?: Logger;\n\n constructor(project: SemiontProject, logger?: Logger) {\n this.logger = logger;\n this.basePath = project.dataDir;\n }\n\n async store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation> {\n // Compute checksum (raw hex) - this will be used as the content address\n const checksum = calculateChecksum(content);\n\n // Strip charset/parameters for path - only use base MIME type for directory structure\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = metadata.mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path using raw hex checksum as filename with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n this.logger?.debug('Storing representation', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n filename: metadata.filename\n });\n\n // Create directory structure programmatically\n await fs.mkdir(path.dirname(filePath), { recursive: true });\n\n // Write content (idempotent - same content = same file)\n await fs.writeFile(filePath, content);\n\n this.logger?.info('Representation stored', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n path: filePath\n });\n\n return {\n '@id': checksum, // Use checksum as the ID (content-addressed)\n ...metadata,\n byteSize: content.length,\n checksum,\n created: new Date().toISOString(),\n };\n }\n\n async retrieve(checksum: string, mediaType: string): Promise<Buffer> {\n // Strip charset/parameters for path - only use base MIME type for directory lookup\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path from raw hex checksum with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n this.logger?.debug('Retrieving representation', {\n checksum,\n mediaType: baseMediaType\n });\n\n try {\n const content = await fs.readFile(filePath);\n this.logger?.info('Representation retrieved', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n path: filePath\n });\n return content;\n } catch (error: any) {\n if (error.code === 'ENOENT') {\n this.logger?.warn('Representation not found', {\n checksum,\n mediaType: baseMediaType,\n path: filePath\n });\n throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);\n }\n this.logger?.error('Failed to retrieve representation', {\n checksum,\n mediaType: baseMediaType,\n error: error.message,\n path: filePath\n });\n throw error;\n }\n }\n\n /**\n * Encode media type for filesystem path\n * Replaces \"/\" with \"~1\" to avoid directory separators\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Encoded path segment (e.g., \"text~1markdown\")\n */\n private encodeMediaType(mediaType: string): string {\n return mediaType.replace(/\\//g, '~1');\n }\n}\n","/**\n * Checksum utilities for content verification\n */\n\nimport { createHash } from 'crypto';\n\n/**\n * Calculate SHA-256 checksum of content\n * @param content The content to hash\n * @returns Hex-encoded SHA-256 hash\n */\nexport function calculateChecksum(content: string | Buffer): string {\n const hash = createHash('sha256');\n hash.update(content);\n return hash.digest('hex');\n}\n\n/**\n * Verify content against a checksum\n * @param content The content to verify\n * @param checksum The expected checksum\n * @returns True if content matches checksum\n */\nexport function verifyChecksum(content: string | Buffer, checksum: string): boolean {\n return calculateChecksum(content) === checksum;\n}\n","/**\n * MIME Type to File Extension Mapping\n *\n * Maps common MIME types to their standard file extensions.\n * Used by RepresentationStore to save files with proper extensions.\n */\n\n/**\n * Comprehensive MIME type to extension mapping\n */\nconst MIME_TO_EXTENSION: Record<string, string> = {\n // Text formats\n 'text/plain': '.txt',\n 'text/markdown': '.md',\n 'text/html': '.html',\n 'text/css': '.css',\n 'text/csv': '.csv',\n 'text/xml': '.xml',\n\n // Application formats - structured data\n 'application/json': '.json',\n 'application/xml': '.xml',\n 'application/yaml': '.yaml',\n 'application/x-yaml': '.yaml',\n\n // Application formats - documents\n 'application/pdf': '.pdf',\n 'application/msword': '.doc',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',\n 'application/vnd.ms-excel': '.xls',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',\n 'application/vnd.ms-powerpoint': '.ppt',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',\n\n // Application formats - archives\n 'application/zip': '.zip',\n 'application/gzip': '.gz',\n 'application/x-tar': '.tar',\n 'application/x-7z-compressed': '.7z',\n\n // Application formats - executables/binaries\n 'application/octet-stream': '.bin',\n 'application/wasm': '.wasm',\n\n // Image formats\n 'image/png': '.png',\n 'image/jpeg': '.jpg',\n 'image/gif': '.gif',\n 'image/webp': '.webp',\n 'image/svg+xml': '.svg',\n 'image/bmp': '.bmp',\n 'image/tiff': '.tiff',\n 'image/x-icon': '.ico',\n\n // Audio formats\n 'audio/mpeg': '.mp3',\n 'audio/wav': '.wav',\n 'audio/ogg': '.ogg',\n 'audio/webm': '.webm',\n 'audio/aac': '.aac',\n 'audio/flac': '.flac',\n\n // Video formats\n 'video/mp4': '.mp4',\n 'video/mpeg': '.mpeg',\n 'video/webm': '.webm',\n 'video/ogg': '.ogv',\n 'video/quicktime': '.mov',\n 'video/x-msvideo': '.avi',\n\n // Programming languages\n 'text/javascript': '.js',\n 'application/javascript': '.js',\n 'text/x-typescript': '.ts',\n 'application/typescript': '.ts',\n 'text/x-python': '.py',\n 'text/x-java': '.java',\n 'text/x-c': '.c',\n 'text/x-c++': '.cpp',\n 'text/x-csharp': '.cs',\n 'text/x-go': '.go',\n 'text/x-rust': '.rs',\n 'text/x-ruby': '.rb',\n 'text/x-php': '.php',\n 'text/x-swift': '.swift',\n 'text/x-kotlin': '.kt',\n 'text/x-shell': '.sh',\n\n // Font formats\n 'font/woff': '.woff',\n 'font/woff2': '.woff2',\n 'font/ttf': '.ttf',\n 'font/otf': '.otf',\n};\n\n/**\n * Get file extension for a MIME type\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns File extension with leading dot (e.g., \".md\") or \".dat\" if unknown\n *\n * @example\n * getExtensionForMimeType('text/markdown') // => '.md'\n * getExtensionForMimeType('image/png') // => '.png'\n * getExtensionForMimeType('unknown/type') // => '.dat'\n */\nexport function getExtensionForMimeType(mediaType: string): string {\n // Normalize MIME type (lowercase, remove parameters)\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n\n // Look up in mapping\n const extension = MIME_TO_EXTENSION[normalized];\n\n // Return mapped extension or fallback to .dat\n return extension || '.dat';\n}\n\n/**\n * Check if a MIME type has a known extension mapping\n *\n * @param mediaType - MIME type to check\n * @returns true if extension is known, false if would fallback to .dat\n */\nexport function hasKnownExtension(mediaType: string): boolean {\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n return normalized in MIME_TO_EXTENSION;\n}\n"],"mappings":";AAkCA,SAAS,YAAY,UAAU;AAC/B,OAAO,UAAU;;;AC/BjB,SAAS,kBAAkB;AAOpB,SAAS,kBAAkB,SAAkC;AAClE,QAAM,OAAO,WAAW,QAAQ;AAChC,OAAK,OAAO,OAAO;AACnB,SAAO,KAAK,OAAO,KAAK;AAC1B;AAQO,SAAS,eAAe,SAA0B,UAA2B;AAClF,SAAO,kBAAkB,OAAO,MAAM;AACxC;;;ACfA,IAAM,oBAA4C;AAAA;AAAA,EAEhD,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,YAAY;AAAA;AAAA,EAGZ,oBAAoB;AAAA,EACpB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,sBAAsB;AAAA;AAAA,EAGtB,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,2EAA2E;AAAA,EAC3E,4BAA4B;AAAA,EAC5B,qEAAqE;AAAA,EACrE,iCAAiC;AAAA,EACjC,6EAA6E;AAAA;AAAA,EAG7E,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,qBAAqB;AAAA,EACrB,+BAA+B;AAAA;AAAA,EAG/B,4BAA4B;AAAA,EAC5B,oBAAoB;AAAA;AAAA,EAGpB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,gBAAgB;AAAA;AAAA,EAGhB,cAAc;AAAA,EACd,aAAa;AAAA,EACb,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA;AAAA,EAGd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,cAAc;AAAA,EACd,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,mBAAmB;AAAA;AAAA,EAGnB,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,eAAe;AAAA,EACf,eAAe;AAAA,EACf,cAAc;AAAA,EACd,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,gBAAgB;AAAA;AAAA,EAGhB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,YAAY;AACd;AAaO,SAAS,wBAAwB,WAA2B;AAEjE,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAG/D,QAAM,YAAY,kBAAkB,UAAU;AAG9C,SAAO,aAAa;AACtB;AAQO,SAAS,kBAAkB,WAA4B;AAC5D,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC/D,SAAO,cAAc;AACvB;;;AFtCO,IAAM,gCAAN,MAAmE;AAAA,EAChE;AAAA,EACA;AAAA,EAER,YAAY,SAAyB,QAAiB;AACpD,SAAK,SAAS;AACd,SAAK,WAAW,QAAQ;AAAA,EAC1B;AAAA,EAEA,MAAM,MAAM,SAAiB,UAAiE;AAE5F,UAAM,WAAW,kBAAkB,OAAO;AAI1C,UAAM,gBAAgB,SAAS,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC7D,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,SAAK,QAAQ,MAAM,0BAA0B;AAAA,MAC3C;AAAA,MACA,WAAW;AAAA,MACX,UAAU,QAAQ;AAAA,MAClB,UAAU,SAAS;AAAA,IACrB,CAAC;AAGD,UAAM,GAAG,MAAM,KAAK,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;AAG1D,UAAM,GAAG,UAAU,UAAU,OAAO;AAEpC,SAAK,QAAQ,KAAK,yBAAyB;AAAA,MACzC;AAAA,MACA,WAAW;AAAA,MACX,UAAU,QAAQ;AAAA,MAClB,MAAM;AAAA,IACR,CAAC;AAED,WAAO;AAAA,MACL,OAAO;AAAA;AAAA,MACP,GAAG;AAAA,MACH,UAAU,QAAQ;AAAA,MAClB;AAAA,MACA,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAkB,WAAoC;AAGnE,UAAM,gBAAgB,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AACpD,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,SAAK,QAAQ,MAAM,6BAA6B;AAAA,MAC9C;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,QAAI;AACF,YAAM,UAAU,MAAM,GAAG,SAAS,QAAQ;AAC1C,WAAK,QAAQ,KAAK,4BAA4B;AAAA,QAC5C;AAAA,QACA,WAAW;AAAA,QACX,UAAU,QAAQ;AAAA,QAClB,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT,SAAS,OAAY;AACnB,UAAI,MAAM,SAAS,UAAU;AAC3B,aAAK,QAAQ,KAAK,4BAA4B;AAAA,UAC5C;AAAA,UACA,WAAW;AAAA,UACX,MAAM;AAAA,QACR,CAAC;AACD,cAAM,IAAI,MAAM,yCAAyC,QAAQ,mBAAmB,SAAS,EAAE;AAAA,MACjG;AACA,WAAK,QAAQ,MAAM,qCAAqC;AAAA,QACtD;AAAA,QACA,WAAW;AAAA,QACX,OAAO,MAAM;AAAA,QACb,MAAM;AAAA,MACR,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASQ,gBAAgB,WAA2B;AACjD,WAAO,UAAU,QAAQ,OAAO,IAAI;AAAA,EACtC;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/representation-store.ts","../src/checksum.ts","../src/mime-extensions.ts"],"sourcesContent":["/**\n * RepresentationStore - Content-addressed storage for byte-level resource representations\n *\n * Handles storage and retrieval of concrete byte-level renditions of resources.\n * Uses content-addressed storage where the checksum IS the filename.\n * Supports multiple storage backends (filesystem, S3, IPFS, etc.)\n *\n * Storage structure (filesystem):\n * basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}\n *\n * Where:\n * - {mediaType} is base MIME type with \"/\" encoded as \"~1\" (e.g., \"text~1markdown\")\n * - {ab}/{cd} are first 4 hex digits of checksum for sharding\n * - {checksum} is the raw SHA-256 hex hash (e.g., \"5aaa0b72abc123...\")\n * - {extension} is derived from base MIME type (.md, .txt, .png, etc.)\n *\n * Example:\n * For content with checksum \"5aaa0b72abc123...\" and mediaType \"text/markdown; charset=iso-8859-1\":\n * - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md\n * - Stored mediaType: \"text/markdown; charset=iso-8859-1\" (full type with charset preserved)\n *\n * Character Encoding:\n * - Charset parameters in mediaType are preserved in metadata (e.g., \"text/plain; charset=iso-8859-1\")\n * - Storage path uses only base MIME type (strips charset for directory structure)\n * - Content stored as raw bytes - charset only affects decoding on retrieval\n *\n * This design provides:\n * - O(1) content retrieval by checksum + mediaType\n * - Automatic deduplication (identical content = same file)\n * - Idempotent storage operations\n * - Proper file extensions for filesystem browsing\n * - Faithful preservation of character encoding metadata\n */\n\nimport { promises as fs } from 'fs';\nimport path from 'path';\nimport type { SemiontProject } from '@semiont/core/node';\nimport type { Logger } from '@semiont/core';\nimport { calculateChecksum } from './checksum';\nimport { getExtensionForMimeType } from './mime-extensions';\n\n/**\n * Metadata for a representation being stored\n */\nexport interface RepresentationMetadata {\n mediaType: string; // REQUIRED - MIME type\n filename?: string;\n encoding?: string;\n language?: string;\n rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';\n}\n\n/**\n * Complete representation information\n */\nexport interface StoredRepresentation extends RepresentationMetadata {\n '@id': string; // Representation ID (same as checksum)\n byteSize: number; // Size in bytes\n checksum: string; // Raw SHA-256 hex hash\n created: string; // ISO 8601 timestamp\n}\n\n/**\n * Interface for representation storage backends\n */\nexport interface RepresentationStore {\n /**\n * Store content and return representation metadata\n *\n * @param content - Raw bytes to store\n * @param metadata - Representation metadata\n * @returns Complete representation info with checksum\n */\n store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;\n\n /**\n * Retrieve content by checksum (content-addressed lookup)\n *\n * @param checksum - Content checksum as raw hex (e.g., \"5aaa0b72...\")\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Raw bytes\n */\n retrieve(checksum: string, mediaType: string): Promise<Buffer>;\n}\n\n/**\n * Filesystem implementation of RepresentationStore\n */\nexport class FilesystemRepresentationStore implements RepresentationStore {\n private basePath: string;\n private logger?: Logger;\n\n constructor(project: SemiontProject, logger?: Logger) {\n this.logger = logger;\n this.basePath = project.representationsDir;\n }\n\n async store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation> {\n // Compute checksum (raw hex) - this will be used as the content address\n const checksum = calculateChecksum(content);\n\n // Strip charset/parameters for path - only use base MIME type for directory structure\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = metadata.mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path using raw hex checksum as filename with proper extension\n const filePath = path.join(\n this.basePath,\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n this.logger?.debug('Storing representation', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n filename: metadata.filename\n });\n\n // Create directory structure programmatically\n await fs.mkdir(path.dirname(filePath), { recursive: true });\n\n // Write content (idempotent - same content = same file)\n await fs.writeFile(filePath, content);\n\n this.logger?.info('Representation stored', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n path: filePath\n });\n\n return {\n '@id': checksum, // Use checksum as the ID (content-addressed)\n ...metadata,\n byteSize: content.length,\n checksum,\n created: new Date().toISOString(),\n };\n }\n\n async retrieve(checksum: string, mediaType: string): Promise<Buffer> {\n // Strip charset/parameters for path - only use base MIME type for directory lookup\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path from raw hex checksum with proper extension\n const filePath = path.join(\n this.basePath,\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n this.logger?.debug('Retrieving representation', {\n checksum,\n mediaType: baseMediaType\n });\n\n try {\n const content = await fs.readFile(filePath);\n this.logger?.info('Representation retrieved', {\n checksum,\n mediaType: baseMediaType,\n byteSize: content.length,\n path: filePath\n });\n return content;\n } catch (error: any) {\n if (error.code === 'ENOENT') {\n this.logger?.warn('Representation not found', {\n checksum,\n mediaType: baseMediaType,\n path: filePath\n });\n throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);\n }\n this.logger?.error('Failed to retrieve representation', {\n checksum,\n mediaType: baseMediaType,\n error: error.message,\n path: filePath\n });\n throw error;\n }\n }\n\n /**\n * Encode media type for filesystem path\n * Replaces \"/\" with \"~1\" to avoid directory separators\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Encoded path segment (e.g., \"text~1markdown\")\n */\n private encodeMediaType(mediaType: string): string {\n return mediaType.replace(/\\//g, '~1');\n }\n}\n","/**\n * Checksum utilities for content verification\n */\n\nimport { createHash } from 'crypto';\n\n/**\n * Calculate SHA-256 checksum of content\n * @param content The content to hash\n * @returns Hex-encoded SHA-256 hash\n */\nexport function calculateChecksum(content: string | Buffer): string {\n const hash = createHash('sha256');\n hash.update(content);\n return hash.digest('hex');\n}\n\n/**\n * Verify content against a checksum\n * @param content The content to verify\n * @param checksum The expected checksum\n * @returns True if content matches checksum\n */\nexport function verifyChecksum(content: string | Buffer, checksum: string): boolean {\n return calculateChecksum(content) === checksum;\n}\n","/**\n * MIME Type to File Extension Mapping\n *\n * Maps common MIME types to their standard file extensions.\n * Used by RepresentationStore to save files with proper extensions.\n */\n\n/**\n * Comprehensive MIME type to extension mapping\n */\nconst MIME_TO_EXTENSION: Record<string, string> = {\n // Text formats\n 'text/plain': '.txt',\n 'text/markdown': '.md',\n 'text/html': '.html',\n 'text/css': '.css',\n 'text/csv': '.csv',\n 'text/xml': '.xml',\n\n // Application formats - structured data\n 'application/json': '.json',\n 'application/xml': '.xml',\n 'application/yaml': '.yaml',\n 'application/x-yaml': '.yaml',\n\n // Application formats - documents\n 'application/pdf': '.pdf',\n 'application/msword': '.doc',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',\n 'application/vnd.ms-excel': '.xls',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',\n 'application/vnd.ms-powerpoint': '.ppt',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',\n\n // Application formats - archives\n 'application/zip': '.zip',\n 'application/gzip': '.gz',\n 'application/x-tar': '.tar',\n 'application/x-7z-compressed': '.7z',\n\n // Application formats - executables/binaries\n 'application/octet-stream': '.bin',\n 'application/wasm': '.wasm',\n\n // Image formats\n 'image/png': '.png',\n 'image/jpeg': '.jpg',\n 'image/gif': '.gif',\n 'image/webp': '.webp',\n 'image/svg+xml': '.svg',\n 'image/bmp': '.bmp',\n 'image/tiff': '.tiff',\n 'image/x-icon': '.ico',\n\n // Audio formats\n 'audio/mpeg': '.mp3',\n 'audio/wav': '.wav',\n 'audio/ogg': '.ogg',\n 'audio/webm': '.webm',\n 'audio/aac': '.aac',\n 'audio/flac': '.flac',\n\n // Video formats\n 'video/mp4': '.mp4',\n 'video/mpeg': '.mpeg',\n 'video/webm': '.webm',\n 'video/ogg': '.ogv',\n 'video/quicktime': '.mov',\n 'video/x-msvideo': '.avi',\n\n // Programming languages\n 'text/javascript': '.js',\n 'application/javascript': '.js',\n 'text/x-typescript': '.ts',\n 'application/typescript': '.ts',\n 'text/x-python': '.py',\n 'text/x-java': '.java',\n 'text/x-c': '.c',\n 'text/x-c++': '.cpp',\n 'text/x-csharp': '.cs',\n 'text/x-go': '.go',\n 'text/x-rust': '.rs',\n 'text/x-ruby': '.rb',\n 'text/x-php': '.php',\n 'text/x-swift': '.swift',\n 'text/x-kotlin': '.kt',\n 'text/x-shell': '.sh',\n\n // Font formats\n 'font/woff': '.woff',\n 'font/woff2': '.woff2',\n 'font/ttf': '.ttf',\n 'font/otf': '.otf',\n};\n\n/**\n * Get file extension for a MIME type\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns File extension with leading dot (e.g., \".md\") or \".dat\" if unknown\n *\n * @example\n * getExtensionForMimeType('text/markdown') // => '.md'\n * getExtensionForMimeType('image/png') // => '.png'\n * getExtensionForMimeType('unknown/type') // => '.dat'\n */\nexport function getExtensionForMimeType(mediaType: string): string {\n // Normalize MIME type (lowercase, remove parameters)\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n\n // Look up in mapping\n const extension = MIME_TO_EXTENSION[normalized];\n\n // Return mapped extension or fallback to .dat\n return extension || '.dat';\n}\n\n/**\n * Check if a MIME type has a known extension mapping\n *\n * @param mediaType - MIME type to check\n * @returns true if extension is known, false if would fallback to .dat\n */\nexport function hasKnownExtension(mediaType: string): boolean {\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n return normalized in MIME_TO_EXTENSION;\n}\n"],"mappings":";AAkCA,SAAS,YAAY,UAAU;AAC/B,OAAO,UAAU;;;AC/BjB,SAAS,kBAAkB;AAOpB,SAAS,kBAAkB,SAAkC;AAClE,QAAM,OAAO,WAAW,QAAQ;AAChC,OAAK,OAAO,OAAO;AACnB,SAAO,KAAK,OAAO,KAAK;AAC1B;AAQO,SAAS,eAAe,SAA0B,UAA2B;AAClF,SAAO,kBAAkB,OAAO,MAAM;AACxC;;;ACfA,IAAM,oBAA4C;AAAA;AAAA,EAEhD,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,YAAY;AAAA;AAAA,EAGZ,oBAAoB;AAAA,EACpB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,sBAAsB;AAAA;AAAA,EAGtB,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,2EAA2E;AAAA,EAC3E,4BAA4B;AAAA,EAC5B,qEAAqE;AAAA,EACrE,iCAAiC;AAAA,EACjC,6EAA6E;AAAA;AAAA,EAG7E,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,qBAAqB;AAAA,EACrB,+BAA+B;AAAA;AAAA,EAG/B,4BAA4B;AAAA,EAC5B,oBAAoB;AAAA;AAAA,EAGpB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,gBAAgB;AAAA;AAAA,EAGhB,cAAc;AAAA,EACd,aAAa;AAAA,EACb,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA;AAAA,EAGd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,cAAc;AAAA,EACd,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,mBAAmB;AAAA;AAAA,EAGnB,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,eAAe;AAAA,EACf,eAAe;AAAA,EACf,cAAc;AAAA,EACd,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,gBAAgB;AAAA;AAAA,EAGhB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,YAAY;AACd;AAaO,SAAS,wBAAwB,WAA2B;AAEjE,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAG/D,QAAM,YAAY,kBAAkB,UAAU;AAG9C,SAAO,aAAa;AACtB;AAQO,SAAS,kBAAkB,WAA4B;AAC5D,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC/D,SAAO,cAAc;AACvB;;;AFtCO,IAAM,gCAAN,MAAmE;AAAA,EAChE;AAAA,EACA;AAAA,EAER,YAAY,SAAyB,QAAiB;AACpD,SAAK,SAAS;AACd,SAAK,WAAW,QAAQ;AAAA,EAC1B;AAAA,EAEA,MAAM,MAAM,SAAiB,UAAiE;AAE5F,UAAM,WAAW,kBAAkB,OAAO;AAI1C,UAAM,gBAAgB,SAAS,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC7D,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,SAAK,QAAQ,MAAM,0BAA0B;AAAA,MAC3C;AAAA,MACA,WAAW;AAAA,MACX,UAAU,QAAQ;AAAA,MAClB,UAAU,SAAS;AAAA,IACrB,CAAC;AAGD,UAAM,GAAG,MAAM,KAAK,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;AAG1D,UAAM,GAAG,UAAU,UAAU,OAAO;AAEpC,SAAK,QAAQ,KAAK,yBAAyB;AAAA,MACzC;AAAA,MACA,WAAW;AAAA,MACX,UAAU,QAAQ;AAAA,MAClB,MAAM;AAAA,IACR,CAAC;AAED,WAAO;AAAA,MACL,OAAO;AAAA;AAAA,MACP,GAAG;AAAA,MACH,UAAU,QAAQ;AAAA,MAClB;AAAA,MACA,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAkB,WAAoC;AAGnE,UAAM,gBAAgB,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AACpD,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,SAAK,QAAQ,MAAM,6BAA6B;AAAA,MAC9C;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,QAAI;AACF,YAAM,UAAU,MAAM,GAAG,SAAS,QAAQ;AAC1C,WAAK,QAAQ,KAAK,4BAA4B;AAAA,QAC5C;AAAA,QACA,WAAW;AAAA,QACX,UAAU,QAAQ;AAAA,QAClB,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT,SAAS,OAAY;AACnB,UAAI,MAAM,SAAS,UAAU;AAC3B,aAAK,QAAQ,KAAK,4BAA4B;AAAA,UAC5C;AAAA,UACA,WAAW;AAAA,UACX,MAAM;AAAA,QACR,CAAC;AACD,cAAM,IAAI,MAAM,yCAAyC,QAAQ,mBAAmB,SAAS,EAAE;AAAA,MACjG;AACA,WAAK,QAAQ,MAAM,qCAAqC;AAAA,QACtD;AAAA,QACA,WAAW;AAAA,QACX,OAAO,MAAM;AAAA,QACb,MAAM;AAAA,MACR,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASQ,gBAAgB,WAA2B;AACjD,WAAO,UAAU,QAAQ,OAAO,IAAI;AAAA,EACtC;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semiont/content",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.7",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Content-addressed storage for resource representations",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"@vitest/ui": "4.0.18"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
|
-
"@vitest/coverage-v8": "^4.0
|
|
31
|
+
"@vitest/coverage-v8": "^4.1.0",
|
|
32
32
|
"tsup": "^8.0.1",
|
|
33
33
|
"typescript": "^5.6.3",
|
|
34
34
|
"vitest": "^4.0.18"
|