@semiont/content 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,107 @@
1
+ # @semiont/content
2
+
3
+ [![Tests](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml/badge.svg)](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+content%22)
4
+
5
+ Content-addressed storage for resource representations with automatic deduplication.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @semiont/content
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```typescript
16
+ import { FilesystemRepresentationStore } from '@semiont/content';
17
+
18
+ const store = new FilesystemRepresentationStore({
19
+ basePath: '/path/to/storage'
20
+ });
21
+
22
+ // Store content - checksum becomes the address
23
+ const content = Buffer.from('Hello, World!');
24
+ const stored = await store.store(content, {
25
+ mediaType: 'text/plain',
26
+ rel: 'original'
27
+ });
28
+
29
+ // Retrieve by checksum
30
+ const retrieved = await store.retrieve(stored.checksum, 'text/plain');
31
+ ```
32
+
33
+ From [src/representation-store.ts](src/representation-store.ts): Content-addressed storage implementation.
34
+
35
+ ## Key Features
36
+
37
+ - **Content-Addressed**: SHA-256 checksum is the filename
38
+ - **Automatic Deduplication**: Same content = same file
39
+ - **Idempotent**: Storing same content multiple times has no effect
40
+ - **Sharding**: Distributes files across 65,536 directories for performance
41
+ - **MIME Type Support**: 80+ types with proper file extensions
42
+ - **Character Encoding**: Preserves charset in metadata
43
+
44
+ ## Storage Structure
45
+
46
+ ```
47
+ basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}.{ext}
48
+ ```
49
+
50
+ Example: `text~1markdown/5a/aa/rep-5aaa0b72abc123....md`
51
+
52
+ From [src/representation-store.ts](src/representation-store.ts): Checksum-based sharding uses first 4 hex digits.
53
+
54
+ ## API Reference
55
+
56
+ ### FilesystemRepresentationStore
57
+
58
+ ```typescript
59
+ new FilesystemRepresentationStore(
60
+ config: { basePath: string },
61
+ projectRoot?: string
62
+ )
63
+
64
+ store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>
65
+ retrieve(checksum: string, mediaType: string): Promise<Buffer>
66
+ ```
67
+
68
+ ### Types
69
+
70
+ ```typescript
71
+ interface RepresentationMetadata {
72
+ mediaType: string; // REQUIRED
73
+ filename?: string;
74
+ encoding?: string;
75
+ language?: string;
76
+ rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';
77
+ }
78
+
79
+ interface StoredRepresentation extends RepresentationMetadata {
80
+ '@id': string; // Content address
81
+ byteSize: number;
82
+ checksum: string; // SHA-256 hex (64 chars)
83
+ created: string; // ISO 8601
84
+ }
85
+ ```
86
+
87
+ From [src/representation-store.ts](src/representation-store.ts): Complete type definitions.
88
+
89
+ ### Utilities
90
+
91
+ ```typescript
92
+ getExtensionForMimeType(mediaType: string): string // Returns extension or '.dat'
93
+ hasKnownExtension(mediaType: string): boolean // Check if type is known
94
+ ```
95
+
96
+ From [src/mime-extensions.ts](src/mime-extensions.ts): 80+ MIME type mappings.
97
+
98
+ ## Documentation
99
+
100
+ - [Content Addressing](docs/content-addressing.md) - How content-addressed storage works
101
+ - [Sharding Strategy](docs/sharding-strategy.md) - Directory distribution details
102
+ - [MIME Types](docs/mime-types.md) - Media type handling
103
+ - [Architecture](docs/architecture.md) - Design principles and implementation
104
+
105
+ ## License
106
+
107
+ Apache-2.0
@@ -0,0 +1,120 @@
1
+ /**
2
+ * RepresentationStore - Content-addressed storage for byte-level resource representations
3
+ *
4
+ * Handles storage and retrieval of concrete byte-level renditions of resources.
5
+ * Uses content-addressed storage where the checksum IS the filename.
6
+ * Supports multiple storage backends (filesystem, S3, IPFS, etc.)
7
+ *
8
+ * Storage structure (filesystem):
9
+ * basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}
10
+ *
11
+ * Where:
12
+ * - {mediaType} is base MIME type with "/" encoded as "~1" (e.g., "text~1markdown")
13
+ * - {ab}/{cd} are first 4 hex digits of checksum for sharding
14
+ * - {checksum} is the raw SHA-256 hex hash (e.g., "5aaa0b72abc123...")
15
+ * - {extension} is derived from base MIME type (.md, .txt, .png, etc.)
16
+ *
17
+ * Example:
18
+ * For content with checksum "5aaa0b72abc123..." and mediaType "text/markdown; charset=iso-8859-1":
19
+ * - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md
20
+ * - Stored mediaType: "text/markdown; charset=iso-8859-1" (full type with charset preserved)
21
+ *
22
+ * Character Encoding:
23
+ * - Charset parameters in mediaType are preserved in metadata (e.g., "text/plain; charset=iso-8859-1")
24
+ * - Storage path uses only base MIME type (strips charset for directory structure)
25
+ * - Content stored as raw bytes - charset only affects decoding on retrieval
26
+ *
27
+ * This design provides:
28
+ * - O(1) content retrieval by checksum + mediaType
29
+ * - Automatic deduplication (identical content = same file)
30
+ * - Idempotent storage operations
31
+ * - Proper file extensions for filesystem browsing
32
+ * - Faithful preservation of character encoding metadata
33
+ */
34
+ /**
35
+ * Metadata for a representation being stored
36
+ */
37
+ interface RepresentationMetadata {
38
+ mediaType: string;
39
+ filename?: string;
40
+ encoding?: string;
41
+ language?: string;
42
+ rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';
43
+ }
44
+ /**
45
+ * Complete representation information
46
+ */
47
+ interface StoredRepresentation extends RepresentationMetadata {
48
+ '@id': string;
49
+ byteSize: number;
50
+ checksum: string;
51
+ created: string;
52
+ }
53
+ /**
54
+ * Interface for representation storage backends
55
+ */
56
+ interface RepresentationStore {
57
+ /**
58
+ * Store content and return representation metadata
59
+ *
60
+ * @param content - Raw bytes to store
61
+ * @param metadata - Representation metadata
62
+ * @returns Complete representation info with checksum
63
+ */
64
+ store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;
65
+ /**
66
+ * Retrieve content by checksum (content-addressed lookup)
67
+ *
68
+ * @param checksum - Content checksum as raw hex (e.g., "5aaa0b72...")
69
+ * @param mediaType - MIME type (e.g., "text/markdown")
70
+ * @returns Raw bytes
71
+ */
72
+ retrieve(checksum: string, mediaType: string): Promise<Buffer>;
73
+ }
74
+ /**
75
+ * Filesystem implementation of RepresentationStore
76
+ */
77
+ declare class FilesystemRepresentationStore implements RepresentationStore {
78
+ private basePath;
79
+ constructor(config: {
80
+ basePath: string;
81
+ }, projectRoot?: string);
82
+ store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;
83
+ retrieve(checksum: string, mediaType: string): Promise<Buffer>;
84
+ /**
85
+ * Encode media type for filesystem path
86
+ * Replaces "/" with "~1" to avoid directory separators
87
+ *
88
+ * @param mediaType - MIME type (e.g., "text/markdown")
89
+ * @returns Encoded path segment (e.g., "text~1markdown")
90
+ */
91
+ private encodeMediaType;
92
+ }
93
+
94
+ /**
95
+ * MIME Type to File Extension Mapping
96
+ *
97
+ * Maps common MIME types to their standard file extensions.
98
+ * Used by RepresentationStore to save files with proper extensions.
99
+ */
100
+ /**
101
+ * Get file extension for a MIME type
102
+ *
103
+ * @param mediaType - MIME type (e.g., "text/markdown")
104
+ * @returns File extension with leading dot (e.g., ".md") or ".dat" if unknown
105
+ *
106
+ * @example
107
+ * getExtensionForMimeType('text/markdown') // => '.md'
108
+ * getExtensionForMimeType('image/png') // => '.png'
109
+ * getExtensionForMimeType('unknown/type') // => '.dat'
110
+ */
111
+ declare function getExtensionForMimeType(mediaType: string): string;
112
+ /**
113
+ * Check if a MIME type has a known extension mapping
114
+ *
115
+ * @param mediaType - MIME type to check
116
+ * @returns true if extension is known, false if would fallback to .dat
117
+ */
118
+ declare function hasKnownExtension(mediaType: string): boolean;
119
+
120
+ export { FilesystemRepresentationStore, type RepresentationMetadata, type RepresentationStore, type StoredRepresentation, getExtensionForMimeType, hasKnownExtension };
package/dist/index.js ADDED
@@ -0,0 +1,175 @@
1
+ // src/representation-store.ts
2
+ import { promises as fs } from "fs";
3
+ import path from "path";
4
+ import { calculateChecksum } from "@semiont/core";
5
+
6
+ // src/mime-extensions.ts
7
+ var MIME_TO_EXTENSION = {
8
+ // Text formats
9
+ "text/plain": ".txt",
10
+ "text/markdown": ".md",
11
+ "text/html": ".html",
12
+ "text/css": ".css",
13
+ "text/csv": ".csv",
14
+ "text/xml": ".xml",
15
+ // Application formats - structured data
16
+ "application/json": ".json",
17
+ "application/xml": ".xml",
18
+ "application/yaml": ".yaml",
19
+ "application/x-yaml": ".yaml",
20
+ // Application formats - documents
21
+ "application/pdf": ".pdf",
22
+ "application/msword": ".doc",
23
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
24
+ "application/vnd.ms-excel": ".xls",
25
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
26
+ "application/vnd.ms-powerpoint": ".ppt",
27
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
28
+ // Application formats - archives
29
+ "application/zip": ".zip",
30
+ "application/gzip": ".gz",
31
+ "application/x-tar": ".tar",
32
+ "application/x-7z-compressed": ".7z",
33
+ // Application formats - executables/binaries
34
+ "application/octet-stream": ".bin",
35
+ "application/wasm": ".wasm",
36
+ // Image formats
37
+ "image/png": ".png",
38
+ "image/jpeg": ".jpg",
39
+ "image/gif": ".gif",
40
+ "image/webp": ".webp",
41
+ "image/svg+xml": ".svg",
42
+ "image/bmp": ".bmp",
43
+ "image/tiff": ".tiff",
44
+ "image/x-icon": ".ico",
45
+ // Audio formats
46
+ "audio/mpeg": ".mp3",
47
+ "audio/wav": ".wav",
48
+ "audio/ogg": ".ogg",
49
+ "audio/webm": ".webm",
50
+ "audio/aac": ".aac",
51
+ "audio/flac": ".flac",
52
+ // Video formats
53
+ "video/mp4": ".mp4",
54
+ "video/mpeg": ".mpeg",
55
+ "video/webm": ".webm",
56
+ "video/ogg": ".ogv",
57
+ "video/quicktime": ".mov",
58
+ "video/x-msvideo": ".avi",
59
+ // Programming languages
60
+ "text/javascript": ".js",
61
+ "application/javascript": ".js",
62
+ "text/x-typescript": ".ts",
63
+ "application/typescript": ".ts",
64
+ "text/x-python": ".py",
65
+ "text/x-java": ".java",
66
+ "text/x-c": ".c",
67
+ "text/x-c++": ".cpp",
68
+ "text/x-csharp": ".cs",
69
+ "text/x-go": ".go",
70
+ "text/x-rust": ".rs",
71
+ "text/x-ruby": ".rb",
72
+ "text/x-php": ".php",
73
+ "text/x-swift": ".swift",
74
+ "text/x-kotlin": ".kt",
75
+ "text/x-shell": ".sh",
76
+ // Font formats
77
+ "font/woff": ".woff",
78
+ "font/woff2": ".woff2",
79
+ "font/ttf": ".ttf",
80
+ "font/otf": ".otf"
81
+ };
82
+ function getExtensionForMimeType(mediaType) {
83
+ const normalized = mediaType.toLowerCase().split(";")[0].trim();
84
+ const extension = MIME_TO_EXTENSION[normalized];
85
+ return extension || ".dat";
86
+ }
87
+ function hasKnownExtension(mediaType) {
88
+ const normalized = mediaType.toLowerCase().split(";")[0].trim();
89
+ return normalized in MIME_TO_EXTENSION;
90
+ }
91
+
92
+ // src/representation-store.ts
93
+ var FilesystemRepresentationStore = class {
94
+ basePath;
95
+ constructor(config, projectRoot) {
96
+ if (path.isAbsolute(config.basePath)) {
97
+ this.basePath = config.basePath;
98
+ } else if (projectRoot) {
99
+ this.basePath = path.resolve(projectRoot, config.basePath);
100
+ } else {
101
+ this.basePath = path.resolve(config.basePath);
102
+ }
103
+ }
104
+ async store(content, metadata) {
105
+ const checksum = calculateChecksum(content);
106
+ const baseMediaType = metadata.mediaType.split(";")[0].trim();
107
+ const mediaTypePath = this.encodeMediaType(baseMediaType);
108
+ const extension = getExtensionForMimeType(baseMediaType);
109
+ if (!checksum || checksum.length < 4) {
110
+ throw new Error(`Invalid checksum: ${checksum}`);
111
+ }
112
+ const ab = checksum.substring(0, 2);
113
+ const cd = checksum.substring(2, 4);
114
+ const filePath = path.join(
115
+ this.basePath,
116
+ "representations",
117
+ mediaTypePath,
118
+ ab,
119
+ cd,
120
+ `rep-${checksum}${extension}`
121
+ );
122
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
123
+ await fs.writeFile(filePath, content);
124
+ return {
125
+ "@id": checksum,
126
+ // Use checksum as the ID (content-addressed)
127
+ ...metadata,
128
+ byteSize: content.length,
129
+ checksum,
130
+ created: (/* @__PURE__ */ new Date()).toISOString()
131
+ };
132
+ }
133
+ async retrieve(checksum, mediaType) {
134
+ const baseMediaType = mediaType.split(";")[0].trim();
135
+ const mediaTypePath = this.encodeMediaType(baseMediaType);
136
+ const extension = getExtensionForMimeType(baseMediaType);
137
+ if (!checksum || checksum.length < 4) {
138
+ throw new Error(`Invalid checksum: ${checksum}`);
139
+ }
140
+ const ab = checksum.substring(0, 2);
141
+ const cd = checksum.substring(2, 4);
142
+ const filePath = path.join(
143
+ this.basePath,
144
+ "representations",
145
+ mediaTypePath,
146
+ ab,
147
+ cd,
148
+ `rep-${checksum}${extension}`
149
+ );
150
+ try {
151
+ return await fs.readFile(filePath);
152
+ } catch (error) {
153
+ if (error.code === "ENOENT") {
154
+ throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);
155
+ }
156
+ throw error;
157
+ }
158
+ }
159
+ /**
160
+ * Encode media type for filesystem path
161
+ * Replaces "/" with "~1" to avoid directory separators
162
+ *
163
+ * @param mediaType - MIME type (e.g., "text/markdown")
164
+ * @returns Encoded path segment (e.g., "text~1markdown")
165
+ */
166
+ encodeMediaType(mediaType) {
167
+ return mediaType.replace(/\//g, "~1");
168
+ }
169
+ };
170
+ export {
171
+ FilesystemRepresentationStore,
172
+ getExtensionForMimeType,
173
+ hasKnownExtension
174
+ };
175
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/representation-store.ts","../src/mime-extensions.ts"],"sourcesContent":["/**\n * RepresentationStore - Content-addressed storage for byte-level resource representations\n *\n * Handles storage and retrieval of concrete byte-level renditions of resources.\n * Uses content-addressed storage where the checksum IS the filename.\n * Supports multiple storage backends (filesystem, S3, IPFS, etc.)\n *\n * Storage structure (filesystem):\n * basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}\n *\n * Where:\n * - {mediaType} is base MIME type with \"/\" encoded as \"~1\" (e.g., \"text~1markdown\")\n * - {ab}/{cd} are first 4 hex digits of checksum for sharding\n * - {checksum} is the raw SHA-256 hex hash (e.g., \"5aaa0b72abc123...\")\n * - {extension} is derived from base MIME type (.md, .txt, .png, etc.)\n *\n * Example:\n * For content with checksum \"5aaa0b72abc123...\" and mediaType \"text/markdown; charset=iso-8859-1\":\n * - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md\n * - Stored mediaType: \"text/markdown; charset=iso-8859-1\" (full type with charset preserved)\n *\n * Character Encoding:\n * - Charset parameters in mediaType are preserved in metadata (e.g., \"text/plain; charset=iso-8859-1\")\n * - Storage path uses only base MIME type (strips charset for directory structure)\n * - Content stored as raw bytes - charset only affects decoding on retrieval\n *\n * This design provides:\n * - O(1) content retrieval by checksum + mediaType\n * - Automatic deduplication (identical content = same file)\n * - Idempotent storage operations\n * - Proper file extensions for filesystem browsing\n * - Faithful preservation of character encoding metadata\n */\n\nimport { promises as fs } from 'fs';\nimport path from 'path';\nimport { calculateChecksum } from '@semiont/core';\nimport { getExtensionForMimeType } from './mime-extensions';\n\n/**\n * Metadata for a representation being stored\n */\nexport interface RepresentationMetadata {\n mediaType: string; // REQUIRED - MIME type\n filename?: string;\n encoding?: string;\n language?: string;\n rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';\n}\n\n/**\n * Complete representation information\n */\nexport interface StoredRepresentation extends RepresentationMetadata {\n '@id': string; // Representation ID (same as checksum)\n byteSize: number; // Size in bytes\n checksum: string; // Raw SHA-256 hex hash\n created: string; // ISO 8601 timestamp\n}\n\n/**\n * Interface for representation storage backends\n */\nexport interface RepresentationStore {\n /**\n * Store content and return representation metadata\n *\n * @param content - Raw bytes to store\n * @param metadata - Representation metadata\n * @returns Complete representation info with checksum\n */\n store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;\n\n /**\n * Retrieve content by checksum (content-addressed lookup)\n *\n * @param checksum - Content checksum as raw hex (e.g., \"5aaa0b72...\")\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Raw bytes\n */\n retrieve(checksum: string, mediaType: string): Promise<Buffer>;\n}\n\n/**\n * Filesystem implementation of RepresentationStore\n */\nexport class FilesystemRepresentationStore implements RepresentationStore {\n private basePath: string;\n\n constructor(\n config: { basePath: string },\n projectRoot?: string\n ) {\n // If path is absolute, use it directly\n if (path.isAbsolute(config.basePath)) {\n this.basePath = config.basePath;\n }\n // If projectRoot provided, resolve relative paths against it\n else if (projectRoot) {\n this.basePath = path.resolve(projectRoot, config.basePath);\n }\n // Otherwise fall back to resolving against cwd (backward compat)\n else {\n this.basePath = path.resolve(config.basePath);\n }\n }\n\n async store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation> {\n // Compute checksum (raw hex) - this will be used as the content address\n const checksum = calculateChecksum(content);\n\n // Strip charset/parameters for path - only use base MIME type for directory structure\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = metadata.mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path using raw hex checksum as filename with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n // Create directory structure programmatically\n await fs.mkdir(path.dirname(filePath), { recursive: true });\n\n // Write content (idempotent - same content = same file)\n await fs.writeFile(filePath, content);\n\n return {\n '@id': checksum, // Use checksum as the ID (content-addressed)\n ...metadata,\n byteSize: content.length,\n checksum,\n created: new Date().toISOString(),\n };\n }\n\n async retrieve(checksum: string, mediaType: string): Promise<Buffer> {\n // Strip charset/parameters for path - only use base MIME type for directory lookup\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path from raw hex checksum with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n try {\n return await fs.readFile(filePath);\n } catch (error: any) {\n if (error.code === 'ENOENT') {\n throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);\n }\n throw error;\n }\n }\n\n /**\n * Encode media type for filesystem path\n * Replaces \"/\" with \"~1\" to avoid directory separators\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Encoded path segment (e.g., \"text~1markdown\")\n */\n private encodeMediaType(mediaType: string): string {\n return mediaType.replace(/\\//g, '~1');\n }\n}\n","/**\n * MIME Type to File Extension Mapping\n *\n * Maps common MIME types to their standard file extensions.\n * Used by RepresentationStore to save files with proper extensions.\n */\n\n/**\n * Comprehensive MIME type to extension mapping\n */\nconst MIME_TO_EXTENSION: Record<string, string> = {\n // Text formats\n 'text/plain': '.txt',\n 'text/markdown': '.md',\n 'text/html': '.html',\n 'text/css': '.css',\n 'text/csv': '.csv',\n 'text/xml': '.xml',\n\n // Application formats - structured data\n 'application/json': '.json',\n 'application/xml': '.xml',\n 'application/yaml': '.yaml',\n 'application/x-yaml': '.yaml',\n\n // Application formats - documents\n 'application/pdf': '.pdf',\n 'application/msword': '.doc',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',\n 'application/vnd.ms-excel': '.xls',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',\n 'application/vnd.ms-powerpoint': '.ppt',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',\n\n // Application formats - archives\n 'application/zip': '.zip',\n 'application/gzip': '.gz',\n 'application/x-tar': '.tar',\n 'application/x-7z-compressed': '.7z',\n\n // Application formats - executables/binaries\n 'application/octet-stream': '.bin',\n 'application/wasm': '.wasm',\n\n // Image formats\n 'image/png': '.png',\n 'image/jpeg': '.jpg',\n 'image/gif': '.gif',\n 'image/webp': '.webp',\n 'image/svg+xml': '.svg',\n 'image/bmp': '.bmp',\n 'image/tiff': '.tiff',\n 'image/x-icon': '.ico',\n\n // Audio formats\n 'audio/mpeg': '.mp3',\n 'audio/wav': '.wav',\n 'audio/ogg': '.ogg',\n 'audio/webm': '.webm',\n 'audio/aac': '.aac',\n 'audio/flac': '.flac',\n\n // Video formats\n 'video/mp4': '.mp4',\n 'video/mpeg': '.mpeg',\n 'video/webm': '.webm',\n 'video/ogg': '.ogv',\n 'video/quicktime': '.mov',\n 'video/x-msvideo': '.avi',\n\n // Programming languages\n 'text/javascript': '.js',\n 'application/javascript': '.js',\n 'text/x-typescript': '.ts',\n 'application/typescript': '.ts',\n 'text/x-python': '.py',\n 'text/x-java': '.java',\n 'text/x-c': '.c',\n 'text/x-c++': '.cpp',\n 'text/x-csharp': '.cs',\n 'text/x-go': '.go',\n 'text/x-rust': '.rs',\n 'text/x-ruby': '.rb',\n 'text/x-php': '.php',\n 'text/x-swift': '.swift',\n 'text/x-kotlin': '.kt',\n 'text/x-shell': '.sh',\n\n // Font formats\n 'font/woff': '.woff',\n 'font/woff2': '.woff2',\n 'font/ttf': '.ttf',\n 'font/otf': '.otf',\n};\n\n/**\n * Get file extension for a MIME type\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns File extension with leading dot (e.g., \".md\") or \".dat\" if unknown\n *\n * @example\n * getExtensionForMimeType('text/markdown') // => '.md'\n * getExtensionForMimeType('image/png') // => '.png'\n * getExtensionForMimeType('unknown/type') // => '.dat'\n */\nexport function getExtensionForMimeType(mediaType: string): string {\n // Normalize MIME type (lowercase, remove parameters)\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n\n // Look up in mapping\n const extension = MIME_TO_EXTENSION[normalized];\n\n // Return mapped extension or fallback to .dat\n return extension || '.dat';\n}\n\n/**\n * Check if a MIME type has a known extension mapping\n *\n * @param mediaType - MIME type to check\n * @returns true if extension is known, false if would fallback to .dat\n */\nexport function hasKnownExtension(mediaType: string): boolean {\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n return normalized in MIME_TO_EXTENSION;\n}\n"],"mappings":";AAkCA,SAAS,YAAY,UAAU;AAC/B,OAAO,UAAU;AACjB,SAAS,yBAAyB;;;AC1BlC,IAAM,oBAA4C;AAAA;AAAA,EAEhD,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,YAAY;AAAA;AAAA,EAGZ,oBAAoB;AAAA,EACpB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,sBAAsB;AAAA;AAAA,EAGtB,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,2EAA2E;AAAA,EAC3E,4BAA4B;AAAA,EAC5B,qEAAqE;AAAA,EACrE,iCAAiC;AAAA,EACjC,6EAA6E;AAAA;AAAA,EAG7E,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,qBAAqB;AAAA,EACrB,+BAA+B;AAAA;AAAA,EAG/B,4BAA4B;AAAA,EAC5B,oBAAoB;AAAA;AAAA,EAGpB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,gBAAgB;AAAA;AAAA,EAGhB,cAAc;AAAA,EACd,aAAa;AAAA,EACb,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA;AAAA,EAGd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,cAAc;AAAA,EACd,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,mBAAmB;AAAA;AAAA,EAGnB,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,eAAe;AAAA,EACf,eAAe;AAAA,EACf,cAAc;AAAA,EACd,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,gBAAgB;AAAA;AAAA,EAGhB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,YAAY;AACd;AAaO,SAAS,wBAAwB,WAA2B;AAEjE,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAG/D,QAAM,YAAY,kBAAkB,UAAU;AAG9C,SAAO,aAAa;AACtB;AAQO,SAAS,kBAAkB,WAA4B;AAC5D,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC/D,SAAO,cAAc;AACvB;;;ADxCO,IAAM,gCAAN,MAAmE;AAAA,EAChE;AAAA,EAER,YACE,QACA,aACA;AAEA,QAAI,KAAK,WAAW,OAAO,QAAQ,GAAG;AACpC,WAAK,WAAW,OAAO;AAAA,IACzB,WAES,aAAa;AACpB,WAAK,WAAW,KAAK,QAAQ,aAAa,OAAO,QAAQ;AAAA,IAC3D,OAEK;AACH,WAAK,WAAW,KAAK,QAAQ,OAAO,QAAQ;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,MAAM,MAAM,SAAiB,UAAiE;AAE5F,UAAM,WAAW,kBAAkB,OAAO;AAI1C,UAAM,gBAAgB,SAAS,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC7D,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAGA,UAAM,GAAG,MAAM,KAAK,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;AAG1D,UAAM,GAAG,UAAU,UAAU,OAAO;AAEpC,WAAO;AAAA,MACL,OAAO;AAAA;AAAA,MACP,GAAG;AAAA,MACH,UAAU,QAAQ;AAAA,MAClB;AAAA,MACA,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAkB,WAAoC;AAGnE,UAAM,gBAAgB,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AACpD,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,QAAI;AACF,aAAO,MAAM,GAAG,SAAS,QAAQ;AAAA,IACnC,SAAS,OAAY;AACnB,UAAI,MAAM,SAAS,UAAU;AAC3B,cAAM,IAAI,MAAM,yCAAyC,QAAQ,mBAAmB,SAAS,EAAE;AAAA,MACjG;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASQ,gBAAgB,WAA2B;AACjD,WAAO,UAAU,QAAQ,OAAO,IAAI;AAAA,EACtC;AACF;","names":[]}
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@semiont/content",
3
+ "version": "0.2.28",
4
+ "type": "module",
5
+ "description": "Content-addressed storage for resource representations",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist",
16
+ "README.md"
17
+ ],
18
+ "scripts": {
19
+ "build": "npm run typecheck && tsup",
20
+ "typecheck": "tsc --noEmit",
21
+ "clean": "rm -rf dist",
22
+ "test": "vitest run",
23
+ "test:watch": "vitest"
24
+ },
25
+ "dependencies": {
26
+ "@semiont/core": "*"
27
+ },
28
+ "devDependencies": {
29
+ "tsup": "^8.0.1",
30
+ "typescript": "^5.6.3"
31
+ },
32
+ "keywords": [
33
+ "content",
34
+ "storage",
35
+ "representation",
36
+ "content-addressed",
37
+ "deduplication",
38
+ "semiont"
39
+ ],
40
+ "author": "The AI Alliance",
41
+ "license": "Apache-2.0",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "https://github.com/The-AI-Alliance/semiont.git",
45
+ "directory": "packages/content"
46
+ }
47
+ }