@semiont/content 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/index.d.ts +120 -0
- package/dist/index.js +175 -0
- package/dist/index.js.map +1 -0
- package/package.json +47 -0
package/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# @semiont/content
|
|
2
|
+
|
|
3
|
+
[](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+content%22)
|
|
4
|
+
|
|
5
|
+
Content-addressed storage for resource representations with automatic deduplication.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @semiont/content
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { FilesystemRepresentationStore } from '@semiont/content';
|
|
17
|
+
|
|
18
|
+
const store = new FilesystemRepresentationStore({
|
|
19
|
+
basePath: '/path/to/storage'
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// Store content - checksum becomes the address
|
|
23
|
+
const content = Buffer.from('Hello, World!');
|
|
24
|
+
const stored = await store.store(content, {
|
|
25
|
+
mediaType: 'text/plain',
|
|
26
|
+
rel: 'original'
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Retrieve by checksum
|
|
30
|
+
const retrieved = await store.retrieve(stored.checksum, 'text/plain');
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
From [src/representation-store.ts](src/representation-store.ts): Content-addressed storage implementation.
|
|
34
|
+
|
|
35
|
+
## Key Features
|
|
36
|
+
|
|
37
|
+
- **Content-Addressed**: SHA-256 checksum is the filename
|
|
38
|
+
- **Automatic Deduplication**: Same content = same file
|
|
39
|
+
- **Idempotent**: Storing same content multiple times has no effect
|
|
40
|
+
- **Sharding**: Distributes files across 65,536 directories for performance
|
|
41
|
+
- **MIME Type Support**: 80+ types with proper file extensions
|
|
42
|
+
- **Character Encoding**: Preserves charset in metadata
|
|
43
|
+
|
|
44
|
+
## Storage Structure
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}.{ext}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Example: `text~1markdown/5a/aa/rep-5aaa0b72abc123....md`
|
|
51
|
+
|
|
52
|
+
From [src/representation-store.ts](src/representation-store.ts): Checksum-based sharding uses first 4 hex digits.
|
|
53
|
+
|
|
54
|
+
## API Reference
|
|
55
|
+
|
|
56
|
+
### FilesystemRepresentationStore
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
new FilesystemRepresentationStore(
|
|
60
|
+
config: { basePath: string },
|
|
61
|
+
projectRoot?: string
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>
|
|
65
|
+
retrieve(checksum: string, mediaType: string): Promise<Buffer>
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Types
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
interface RepresentationMetadata {
|
|
72
|
+
mediaType: string; // REQUIRED
|
|
73
|
+
filename?: string;
|
|
74
|
+
encoding?: string;
|
|
75
|
+
language?: string;
|
|
76
|
+
rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interface StoredRepresentation extends RepresentationMetadata {
|
|
80
|
+
'@id': string; // Content address
|
|
81
|
+
byteSize: number;
|
|
82
|
+
checksum: string; // SHA-256 hex (64 chars)
|
|
83
|
+
created: string; // ISO 8601
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
From [src/representation-store.ts](src/representation-store.ts): Complete type definitions.
|
|
88
|
+
|
|
89
|
+
### Utilities
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
getExtensionForMimeType(mediaType: string): string // Returns extension or '.dat'
|
|
93
|
+
hasKnownExtension(mediaType: string): boolean // Check if type is known
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
From [src/mime-extensions.ts](src/mime-extensions.ts): 80+ MIME type mappings.
|
|
97
|
+
|
|
98
|
+
## Documentation
|
|
99
|
+
|
|
100
|
+
- [Content Addressing](docs/content-addressing.md) - How content-addressed storage works
|
|
101
|
+
- [Sharding Strategy](docs/sharding-strategy.md) - Directory distribution details
|
|
102
|
+
- [MIME Types](docs/mime-types.md) - Media type handling
|
|
103
|
+
- [Architecture](docs/architecture.md) - Design principles and implementation
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
Apache-2.0
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RepresentationStore - Content-addressed storage for byte-level resource representations
|
|
3
|
+
*
|
|
4
|
+
* Handles storage and retrieval of concrete byte-level renditions of resources.
|
|
5
|
+
* Uses content-addressed storage where the checksum IS the filename.
|
|
6
|
+
* Supports multiple storage backends (filesystem, S3, IPFS, etc.)
|
|
7
|
+
*
|
|
8
|
+
* Storage structure (filesystem):
|
|
9
|
+
* basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}
|
|
10
|
+
*
|
|
11
|
+
* Where:
|
|
12
|
+
* - {mediaType} is base MIME type with "/" encoded as "~1" (e.g., "text~1markdown")
|
|
13
|
+
* - {ab}/{cd} are first 4 hex digits of checksum for sharding
|
|
14
|
+
* - {checksum} is the raw SHA-256 hex hash (e.g., "5aaa0b72abc123...")
|
|
15
|
+
* - {extension} is derived from base MIME type (.md, .txt, .png, etc.)
|
|
16
|
+
*
|
|
17
|
+
* Example:
|
|
18
|
+
* For content with checksum "5aaa0b72abc123..." and mediaType "text/markdown; charset=iso-8859-1":
|
|
19
|
+
* - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md
|
|
20
|
+
* - Stored mediaType: "text/markdown; charset=iso-8859-1" (full type with charset preserved)
|
|
21
|
+
*
|
|
22
|
+
* Character Encoding:
|
|
23
|
+
* - Charset parameters in mediaType are preserved in metadata (e.g., "text/plain; charset=iso-8859-1")
|
|
24
|
+
* - Storage path uses only base MIME type (strips charset for directory structure)
|
|
25
|
+
* - Content stored as raw bytes - charset only affects decoding on retrieval
|
|
26
|
+
*
|
|
27
|
+
* This design provides:
|
|
28
|
+
* - O(1) content retrieval by checksum + mediaType
|
|
29
|
+
* - Automatic deduplication (identical content = same file)
|
|
30
|
+
* - Idempotent storage operations
|
|
31
|
+
* - Proper file extensions for filesystem browsing
|
|
32
|
+
* - Faithful preservation of character encoding metadata
|
|
33
|
+
*/
|
|
34
|
+
/**
|
|
35
|
+
* Metadata for a representation being stored
|
|
36
|
+
*/
|
|
37
|
+
interface RepresentationMetadata {
|
|
38
|
+
mediaType: string;
|
|
39
|
+
filename?: string;
|
|
40
|
+
encoding?: string;
|
|
41
|
+
language?: string;
|
|
42
|
+
rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Complete representation information
|
|
46
|
+
*/
|
|
47
|
+
interface StoredRepresentation extends RepresentationMetadata {
|
|
48
|
+
'@id': string;
|
|
49
|
+
byteSize: number;
|
|
50
|
+
checksum: string;
|
|
51
|
+
created: string;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Interface for representation storage backends
|
|
55
|
+
*/
|
|
56
|
+
interface RepresentationStore {
|
|
57
|
+
/**
|
|
58
|
+
* Store content and return representation metadata
|
|
59
|
+
*
|
|
60
|
+
* @param content - Raw bytes to store
|
|
61
|
+
* @param metadata - Representation metadata
|
|
62
|
+
* @returns Complete representation info with checksum
|
|
63
|
+
*/
|
|
64
|
+
store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;
|
|
65
|
+
/**
|
|
66
|
+
* Retrieve content by checksum (content-addressed lookup)
|
|
67
|
+
*
|
|
68
|
+
* @param checksum - Content checksum as raw hex (e.g., "5aaa0b72...")
|
|
69
|
+
* @param mediaType - MIME type (e.g., "text/markdown")
|
|
70
|
+
* @returns Raw bytes
|
|
71
|
+
*/
|
|
72
|
+
retrieve(checksum: string, mediaType: string): Promise<Buffer>;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Filesystem implementation of RepresentationStore
|
|
76
|
+
*/
|
|
77
|
+
declare class FilesystemRepresentationStore implements RepresentationStore {
|
|
78
|
+
private basePath;
|
|
79
|
+
constructor(config: {
|
|
80
|
+
basePath: string;
|
|
81
|
+
}, projectRoot?: string);
|
|
82
|
+
store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;
|
|
83
|
+
retrieve(checksum: string, mediaType: string): Promise<Buffer>;
|
|
84
|
+
/**
|
|
85
|
+
* Encode media type for filesystem path
|
|
86
|
+
* Replaces "/" with "~1" to avoid directory separators
|
|
87
|
+
*
|
|
88
|
+
* @param mediaType - MIME type (e.g., "text/markdown")
|
|
89
|
+
* @returns Encoded path segment (e.g., "text~1markdown")
|
|
90
|
+
*/
|
|
91
|
+
private encodeMediaType;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* MIME Type to File Extension Mapping
|
|
96
|
+
*
|
|
97
|
+
* Maps common MIME types to their standard file extensions.
|
|
98
|
+
* Used by RepresentationStore to save files with proper extensions.
|
|
99
|
+
*/
|
|
100
|
+
/**
|
|
101
|
+
* Get file extension for a MIME type
|
|
102
|
+
*
|
|
103
|
+
* @param mediaType - MIME type (e.g., "text/markdown")
|
|
104
|
+
* @returns File extension with leading dot (e.g., ".md") or ".dat" if unknown
|
|
105
|
+
*
|
|
106
|
+
* @example
|
|
107
|
+
* getExtensionForMimeType('text/markdown') // => '.md'
|
|
108
|
+
* getExtensionForMimeType('image/png') // => '.png'
|
|
109
|
+
* getExtensionForMimeType('unknown/type') // => '.dat'
|
|
110
|
+
*/
|
|
111
|
+
declare function getExtensionForMimeType(mediaType: string): string;
|
|
112
|
+
/**
|
|
113
|
+
* Check if a MIME type has a known extension mapping
|
|
114
|
+
*
|
|
115
|
+
* @param mediaType - MIME type to check
|
|
116
|
+
* @returns true if extension is known, false if would fallback to .dat
|
|
117
|
+
*/
|
|
118
|
+
declare function hasKnownExtension(mediaType: string): boolean;
|
|
119
|
+
|
|
120
|
+
export { FilesystemRepresentationStore, type RepresentationMetadata, type RepresentationStore, type StoredRepresentation, getExtensionForMimeType, hasKnownExtension };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
// src/representation-store.ts
|
|
2
|
+
import { promises as fs } from "fs";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import { calculateChecksum } from "@semiont/core";
|
|
5
|
+
|
|
6
|
+
// src/mime-extensions.ts
|
|
7
|
+
var MIME_TO_EXTENSION = {
|
|
8
|
+
// Text formats
|
|
9
|
+
"text/plain": ".txt",
|
|
10
|
+
"text/markdown": ".md",
|
|
11
|
+
"text/html": ".html",
|
|
12
|
+
"text/css": ".css",
|
|
13
|
+
"text/csv": ".csv",
|
|
14
|
+
"text/xml": ".xml",
|
|
15
|
+
// Application formats - structured data
|
|
16
|
+
"application/json": ".json",
|
|
17
|
+
"application/xml": ".xml",
|
|
18
|
+
"application/yaml": ".yaml",
|
|
19
|
+
"application/x-yaml": ".yaml",
|
|
20
|
+
// Application formats - documents
|
|
21
|
+
"application/pdf": ".pdf",
|
|
22
|
+
"application/msword": ".doc",
|
|
23
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
24
|
+
"application/vnd.ms-excel": ".xls",
|
|
25
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
26
|
+
"application/vnd.ms-powerpoint": ".ppt",
|
|
27
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
28
|
+
// Application formats - archives
|
|
29
|
+
"application/zip": ".zip",
|
|
30
|
+
"application/gzip": ".gz",
|
|
31
|
+
"application/x-tar": ".tar",
|
|
32
|
+
"application/x-7z-compressed": ".7z",
|
|
33
|
+
// Application formats - executables/binaries
|
|
34
|
+
"application/octet-stream": ".bin",
|
|
35
|
+
"application/wasm": ".wasm",
|
|
36
|
+
// Image formats
|
|
37
|
+
"image/png": ".png",
|
|
38
|
+
"image/jpeg": ".jpg",
|
|
39
|
+
"image/gif": ".gif",
|
|
40
|
+
"image/webp": ".webp",
|
|
41
|
+
"image/svg+xml": ".svg",
|
|
42
|
+
"image/bmp": ".bmp",
|
|
43
|
+
"image/tiff": ".tiff",
|
|
44
|
+
"image/x-icon": ".ico",
|
|
45
|
+
// Audio formats
|
|
46
|
+
"audio/mpeg": ".mp3",
|
|
47
|
+
"audio/wav": ".wav",
|
|
48
|
+
"audio/ogg": ".ogg",
|
|
49
|
+
"audio/webm": ".webm",
|
|
50
|
+
"audio/aac": ".aac",
|
|
51
|
+
"audio/flac": ".flac",
|
|
52
|
+
// Video formats
|
|
53
|
+
"video/mp4": ".mp4",
|
|
54
|
+
"video/mpeg": ".mpeg",
|
|
55
|
+
"video/webm": ".webm",
|
|
56
|
+
"video/ogg": ".ogv",
|
|
57
|
+
"video/quicktime": ".mov",
|
|
58
|
+
"video/x-msvideo": ".avi",
|
|
59
|
+
// Programming languages
|
|
60
|
+
"text/javascript": ".js",
|
|
61
|
+
"application/javascript": ".js",
|
|
62
|
+
"text/x-typescript": ".ts",
|
|
63
|
+
"application/typescript": ".ts",
|
|
64
|
+
"text/x-python": ".py",
|
|
65
|
+
"text/x-java": ".java",
|
|
66
|
+
"text/x-c": ".c",
|
|
67
|
+
"text/x-c++": ".cpp",
|
|
68
|
+
"text/x-csharp": ".cs",
|
|
69
|
+
"text/x-go": ".go",
|
|
70
|
+
"text/x-rust": ".rs",
|
|
71
|
+
"text/x-ruby": ".rb",
|
|
72
|
+
"text/x-php": ".php",
|
|
73
|
+
"text/x-swift": ".swift",
|
|
74
|
+
"text/x-kotlin": ".kt",
|
|
75
|
+
"text/x-shell": ".sh",
|
|
76
|
+
// Font formats
|
|
77
|
+
"font/woff": ".woff",
|
|
78
|
+
"font/woff2": ".woff2",
|
|
79
|
+
"font/ttf": ".ttf",
|
|
80
|
+
"font/otf": ".otf"
|
|
81
|
+
};
|
|
82
|
+
function getExtensionForMimeType(mediaType) {
|
|
83
|
+
const normalized = mediaType.toLowerCase().split(";")[0].trim();
|
|
84
|
+
const extension = MIME_TO_EXTENSION[normalized];
|
|
85
|
+
return extension || ".dat";
|
|
86
|
+
}
|
|
87
|
+
function hasKnownExtension(mediaType) {
|
|
88
|
+
const normalized = mediaType.toLowerCase().split(";")[0].trim();
|
|
89
|
+
return normalized in MIME_TO_EXTENSION;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// src/representation-store.ts
|
|
93
|
+
var FilesystemRepresentationStore = class {
|
|
94
|
+
basePath;
|
|
95
|
+
constructor(config, projectRoot) {
|
|
96
|
+
if (path.isAbsolute(config.basePath)) {
|
|
97
|
+
this.basePath = config.basePath;
|
|
98
|
+
} else if (projectRoot) {
|
|
99
|
+
this.basePath = path.resolve(projectRoot, config.basePath);
|
|
100
|
+
} else {
|
|
101
|
+
this.basePath = path.resolve(config.basePath);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
async store(content, metadata) {
|
|
105
|
+
const checksum = calculateChecksum(content);
|
|
106
|
+
const baseMediaType = metadata.mediaType.split(";")[0].trim();
|
|
107
|
+
const mediaTypePath = this.encodeMediaType(baseMediaType);
|
|
108
|
+
const extension = getExtensionForMimeType(baseMediaType);
|
|
109
|
+
if (!checksum || checksum.length < 4) {
|
|
110
|
+
throw new Error(`Invalid checksum: ${checksum}`);
|
|
111
|
+
}
|
|
112
|
+
const ab = checksum.substring(0, 2);
|
|
113
|
+
const cd = checksum.substring(2, 4);
|
|
114
|
+
const filePath = path.join(
|
|
115
|
+
this.basePath,
|
|
116
|
+
"representations",
|
|
117
|
+
mediaTypePath,
|
|
118
|
+
ab,
|
|
119
|
+
cd,
|
|
120
|
+
`rep-${checksum}${extension}`
|
|
121
|
+
);
|
|
122
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
123
|
+
await fs.writeFile(filePath, content);
|
|
124
|
+
return {
|
|
125
|
+
"@id": checksum,
|
|
126
|
+
// Use checksum as the ID (content-addressed)
|
|
127
|
+
...metadata,
|
|
128
|
+
byteSize: content.length,
|
|
129
|
+
checksum,
|
|
130
|
+
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
async retrieve(checksum, mediaType) {
|
|
134
|
+
const baseMediaType = mediaType.split(";")[0].trim();
|
|
135
|
+
const mediaTypePath = this.encodeMediaType(baseMediaType);
|
|
136
|
+
const extension = getExtensionForMimeType(baseMediaType);
|
|
137
|
+
if (!checksum || checksum.length < 4) {
|
|
138
|
+
throw new Error(`Invalid checksum: ${checksum}`);
|
|
139
|
+
}
|
|
140
|
+
const ab = checksum.substring(0, 2);
|
|
141
|
+
const cd = checksum.substring(2, 4);
|
|
142
|
+
const filePath = path.join(
|
|
143
|
+
this.basePath,
|
|
144
|
+
"representations",
|
|
145
|
+
mediaTypePath,
|
|
146
|
+
ab,
|
|
147
|
+
cd,
|
|
148
|
+
`rep-${checksum}${extension}`
|
|
149
|
+
);
|
|
150
|
+
try {
|
|
151
|
+
return await fs.readFile(filePath);
|
|
152
|
+
} catch (error) {
|
|
153
|
+
if (error.code === "ENOENT") {
|
|
154
|
+
throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);
|
|
155
|
+
}
|
|
156
|
+
throw error;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Encode media type for filesystem path
|
|
161
|
+
* Replaces "/" with "~1" to avoid directory separators
|
|
162
|
+
*
|
|
163
|
+
* @param mediaType - MIME type (e.g., "text/markdown")
|
|
164
|
+
* @returns Encoded path segment (e.g., "text~1markdown")
|
|
165
|
+
*/
|
|
166
|
+
encodeMediaType(mediaType) {
|
|
167
|
+
return mediaType.replace(/\//g, "~1");
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
export {
|
|
171
|
+
FilesystemRepresentationStore,
|
|
172
|
+
getExtensionForMimeType,
|
|
173
|
+
hasKnownExtension
|
|
174
|
+
};
|
|
175
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/representation-store.ts","../src/mime-extensions.ts"],"sourcesContent":["/**\n * RepresentationStore - Content-addressed storage for byte-level resource representations\n *\n * Handles storage and retrieval of concrete byte-level renditions of resources.\n * Uses content-addressed storage where the checksum IS the filename.\n * Supports multiple storage backends (filesystem, S3, IPFS, etc.)\n *\n * Storage structure (filesystem):\n * basePath/representations/{mediaType}/{ab}/{cd}/rep-{checksum}{extension}\n *\n * Where:\n * - {mediaType} is base MIME type with \"/\" encoded as \"~1\" (e.g., \"text~1markdown\")\n * - {ab}/{cd} are first 4 hex digits of checksum for sharding\n * - {checksum} is the raw SHA-256 hex hash (e.g., \"5aaa0b72abc123...\")\n * - {extension} is derived from base MIME type (.md, .txt, .png, etc.)\n *\n * Example:\n * For content with checksum \"5aaa0b72abc123...\" and mediaType \"text/markdown; charset=iso-8859-1\":\n * - Storage path: basePath/representations/text~1markdown/5a/aa/rep-5aaa0b72abc123....md\n * - Stored mediaType: \"text/markdown; charset=iso-8859-1\" (full type with charset preserved)\n *\n * Character Encoding:\n * - Charset parameters in mediaType are preserved in metadata (e.g., \"text/plain; charset=iso-8859-1\")\n * - Storage path uses only base MIME type (strips charset for directory structure)\n * - Content stored as raw bytes - charset only affects decoding on retrieval\n *\n * This design provides:\n * - O(1) content retrieval by checksum + mediaType\n * - Automatic deduplication (identical content = same file)\n * - Idempotent storage operations\n * - Proper file extensions for filesystem browsing\n * - Faithful preservation of character encoding metadata\n */\n\nimport { promises as fs } from 'fs';\nimport path from 'path';\nimport { calculateChecksum } from '@semiont/core';\nimport { getExtensionForMimeType } from './mime-extensions';\n\n/**\n * Metadata for a representation being stored\n */\nexport interface RepresentationMetadata {\n mediaType: string; // REQUIRED - MIME type\n filename?: string;\n encoding?: string;\n language?: string;\n rel?: 'original' | 'thumbnail' | 'preview' | 'optimized' | 'derived' | 'other';\n}\n\n/**\n * Complete representation information\n */\nexport interface StoredRepresentation extends RepresentationMetadata {\n '@id': string; // Representation ID (same as checksum)\n byteSize: number; // Size in bytes\n checksum: string; // Raw SHA-256 hex hash\n created: string; // ISO 8601 timestamp\n}\n\n/**\n * Interface for representation storage backends\n */\nexport interface RepresentationStore {\n /**\n * Store content and return representation metadata\n *\n * @param content - Raw bytes to store\n * @param metadata - Representation metadata\n * @returns Complete representation info with checksum\n */\n store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation>;\n\n /**\n * Retrieve content by checksum (content-addressed lookup)\n *\n * @param checksum - Content checksum as raw hex (e.g., \"5aaa0b72...\")\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Raw bytes\n */\n retrieve(checksum: string, mediaType: string): Promise<Buffer>;\n}\n\n/**\n * Filesystem implementation of RepresentationStore\n */\nexport class FilesystemRepresentationStore implements RepresentationStore {\n private basePath: string;\n\n constructor(\n config: { basePath: string },\n projectRoot?: string\n ) {\n // If path is absolute, use it directly\n if (path.isAbsolute(config.basePath)) {\n this.basePath = config.basePath;\n }\n // If projectRoot provided, resolve relative paths against it\n else if (projectRoot) {\n this.basePath = path.resolve(projectRoot, config.basePath);\n }\n // Otherwise fall back to resolving against cwd (backward compat)\n else {\n this.basePath = path.resolve(config.basePath);\n }\n }\n\n async store(content: Buffer, metadata: RepresentationMetadata): Promise<StoredRepresentation> {\n // Compute checksum (raw hex) - this will be used as the content address\n const checksum = calculateChecksum(content);\n\n // Strip charset/parameters for path - only use base MIME type for directory structure\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = metadata.mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path using raw hex checksum as filename with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n // Create directory structure programmatically\n await fs.mkdir(path.dirname(filePath), { recursive: true });\n\n // Write content (idempotent - same content = same file)\n await fs.writeFile(filePath, content);\n\n return {\n '@id': checksum, // Use checksum as the ID (content-addressed)\n ...metadata,\n byteSize: content.length,\n checksum,\n created: new Date().toISOString(),\n };\n }\n\n async retrieve(checksum: string, mediaType: string): Promise<Buffer> {\n // Strip charset/parameters for path - only use base MIME type for directory lookup\n // e.g., \"text/plain; charset=iso-8859-1\" -> \"text/plain\"\n const baseMediaType = mediaType.split(';')[0]!.trim();\n const mediaTypePath = this.encodeMediaType(baseMediaType);\n const extension = getExtensionForMimeType(baseMediaType);\n\n if (!checksum || checksum.length < 4) {\n throw new Error(`Invalid checksum: ${checksum}`);\n }\n\n // Use first 4 hex digits for sharding: 5a/aa\n const ab = checksum.substring(0, 2);\n const cd = checksum.substring(2, 4);\n\n // Build file path from raw hex checksum with proper extension\n const filePath = path.join(\n this.basePath,\n 'representations',\n mediaTypePath,\n ab,\n cd,\n `rep-${checksum}${extension}`\n );\n\n try {\n return await fs.readFile(filePath);\n } catch (error: any) {\n if (error.code === 'ENOENT') {\n throw new Error(`Representation not found for checksum ${checksum} with mediaType ${mediaType}`);\n }\n throw error;\n }\n }\n\n /**\n * Encode media type for filesystem path\n * Replaces \"/\" with \"~1\" to avoid directory separators\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns Encoded path segment (e.g., \"text~1markdown\")\n */\n private encodeMediaType(mediaType: string): string {\n return mediaType.replace(/\\//g, '~1');\n }\n}\n","/**\n * MIME Type to File Extension Mapping\n *\n * Maps common MIME types to their standard file extensions.\n * Used by RepresentationStore to save files with proper extensions.\n */\n\n/**\n * Comprehensive MIME type to extension mapping\n */\nconst MIME_TO_EXTENSION: Record<string, string> = {\n // Text formats\n 'text/plain': '.txt',\n 'text/markdown': '.md',\n 'text/html': '.html',\n 'text/css': '.css',\n 'text/csv': '.csv',\n 'text/xml': '.xml',\n\n // Application formats - structured data\n 'application/json': '.json',\n 'application/xml': '.xml',\n 'application/yaml': '.yaml',\n 'application/x-yaml': '.yaml',\n\n // Application formats - documents\n 'application/pdf': '.pdf',\n 'application/msword': '.doc',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',\n 'application/vnd.ms-excel': '.xls',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',\n 'application/vnd.ms-powerpoint': '.ppt',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',\n\n // Application formats - archives\n 'application/zip': '.zip',\n 'application/gzip': '.gz',\n 'application/x-tar': '.tar',\n 'application/x-7z-compressed': '.7z',\n\n // Application formats - executables/binaries\n 'application/octet-stream': '.bin',\n 'application/wasm': '.wasm',\n\n // Image formats\n 'image/png': '.png',\n 'image/jpeg': '.jpg',\n 'image/gif': '.gif',\n 'image/webp': '.webp',\n 'image/svg+xml': '.svg',\n 'image/bmp': '.bmp',\n 'image/tiff': '.tiff',\n 'image/x-icon': '.ico',\n\n // Audio formats\n 'audio/mpeg': '.mp3',\n 'audio/wav': '.wav',\n 'audio/ogg': '.ogg',\n 'audio/webm': '.webm',\n 'audio/aac': '.aac',\n 'audio/flac': '.flac',\n\n // Video formats\n 'video/mp4': '.mp4',\n 'video/mpeg': '.mpeg',\n 'video/webm': '.webm',\n 'video/ogg': '.ogv',\n 'video/quicktime': '.mov',\n 'video/x-msvideo': '.avi',\n\n // Programming languages\n 'text/javascript': '.js',\n 'application/javascript': '.js',\n 'text/x-typescript': '.ts',\n 'application/typescript': '.ts',\n 'text/x-python': '.py',\n 'text/x-java': '.java',\n 'text/x-c': '.c',\n 'text/x-c++': '.cpp',\n 'text/x-csharp': '.cs',\n 'text/x-go': '.go',\n 'text/x-rust': '.rs',\n 'text/x-ruby': '.rb',\n 'text/x-php': '.php',\n 'text/x-swift': '.swift',\n 'text/x-kotlin': '.kt',\n 'text/x-shell': '.sh',\n\n // Font formats\n 'font/woff': '.woff',\n 'font/woff2': '.woff2',\n 'font/ttf': '.ttf',\n 'font/otf': '.otf',\n};\n\n/**\n * Get file extension for a MIME type\n *\n * @param mediaType - MIME type (e.g., \"text/markdown\")\n * @returns File extension with leading dot (e.g., \".md\") or \".dat\" if unknown\n *\n * @example\n * getExtensionForMimeType('text/markdown') // => '.md'\n * getExtensionForMimeType('image/png') // => '.png'\n * getExtensionForMimeType('unknown/type') // => '.dat'\n */\nexport function getExtensionForMimeType(mediaType: string): string {\n // Normalize MIME type (lowercase, remove parameters)\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n\n // Look up in mapping\n const extension = MIME_TO_EXTENSION[normalized];\n\n // Return mapped extension or fallback to .dat\n return extension || '.dat';\n}\n\n/**\n * Check if a MIME type has a known extension mapping\n *\n * @param mediaType - MIME type to check\n * @returns true if extension is known, false if would fallback to .dat\n */\nexport function hasKnownExtension(mediaType: string): boolean {\n const normalized = mediaType.toLowerCase().split(';')[0]!.trim();\n return normalized in MIME_TO_EXTENSION;\n}\n"],"mappings":";AAkCA,SAAS,YAAY,UAAU;AAC/B,OAAO,UAAU;AACjB,SAAS,yBAAyB;;;AC1BlC,IAAM,oBAA4C;AAAA;AAAA,EAEhD,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,YAAY;AAAA;AAAA,EAGZ,oBAAoB;AAAA,EACpB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,sBAAsB;AAAA;AAAA,EAGtB,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,2EAA2E;AAAA,EAC3E,4BAA4B;AAAA,EAC5B,qEAAqE;AAAA,EACrE,iCAAiC;AAAA,EACjC,6EAA6E;AAAA;AAAA,EAG7E,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,qBAAqB;AAAA,EACrB,+BAA+B;AAAA;AAAA,EAG/B,4BAA4B;AAAA,EAC5B,oBAAoB;AAAA;AAAA,EAGpB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,gBAAgB;AAAA;AAAA,EAGhB,cAAc;AAAA,EACd,aAAa;AAAA,EACb,aAAa;AAAA,EACb,cAAc;AAAA,EACd,aAAa;AAAA,EACb,cAAc;AAAA;AAAA,EAGd,aAAa;AAAA,EACb,cAAc;AAAA,EACd,cAAc;AAAA,EACd,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,mBAAmB;AAAA;AAAA,EAGnB,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,eAAe;AAAA,EACf,eAAe;AAAA,EACf,cAAc;AAAA,EACd,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,gBAAgB;AAAA;AAAA,EAGhB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,YAAY;AACd;AAaO,SAAS,wBAAwB,WAA2B;AAEjE,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAG/D,QAAM,YAAY,kBAAkB,UAAU;AAG9C,SAAO,aAAa;AACtB;AAQO,SAAS,kBAAkB,WAA4B;AAC5D,QAAM,aAAa,UAAU,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC/D,SAAO,cAAc;AACvB;;;ADxCO,IAAM,gCAAN,MAAmE;AAAA,EAChE;AAAA,EAER,YACE,QACA,aACA;AAEA,QAAI,KAAK,WAAW,OAAO,QAAQ,GAAG;AACpC,WAAK,WAAW,OAAO;AAAA,IACzB,WAES,aAAa;AACpB,WAAK,WAAW,KAAK,QAAQ,aAAa,OAAO,QAAQ;AAAA,IAC3D,OAEK;AACH,WAAK,WAAW,KAAK,QAAQ,OAAO,QAAQ;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,MAAM,MAAM,SAAiB,UAAiE;AAE5F,UAAM,WAAW,kBAAkB,OAAO;AAI1C,UAAM,gBAAgB,SAAS,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AAC7D,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAGA,UAAM,GAAG,MAAM,KAAK,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;AAG1D,UAAM,GAAG,UAAU,UAAU,OAAO;AAEpC,WAAO;AAAA,MACL,OAAO;AAAA;AAAA,MACP,GAAG;AAAA,MACH,UAAU,QAAQ;AAAA,MAClB;AAAA,MACA,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAkB,WAAoC;AAGnE,UAAM,gBAAgB,UAAU,MAAM,GAAG,EAAE,CAAC,EAAG,KAAK;AACpD,UAAM,gBAAgB,KAAK,gBAAgB,aAAa;AACxD,UAAM,YAAY,wBAAwB,aAAa;AAEvD,QAAI,CAAC,YAAY,SAAS,SAAS,GAAG;AACpC,YAAM,IAAI,MAAM,qBAAqB,QAAQ,EAAE;AAAA,IACjD;AAGA,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAClC,UAAM,KAAK,SAAS,UAAU,GAAG,CAAC;AAGlC,UAAM,WAAW,KAAK;AAAA,MACpB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,OAAO,QAAQ,GAAG,SAAS;AAAA,IAC7B;AAEA,QAAI;AACF,aAAO,MAAM,GAAG,SAAS,QAAQ;AAAA,IACnC,SAAS,OAAY;AACnB,UAAI,MAAM,SAAS,UAAU;AAC3B,cAAM,IAAI,MAAM,yCAAyC,QAAQ,mBAAmB,SAAS,EAAE;AAAA,MACjG;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASQ,gBAAgB,WAA2B;AACjD,WAAO,UAAU,QAAQ,OAAO,IAAI;AAAA,EACtC;AACF;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@semiont/content",
|
|
3
|
+
"version": "0.2.28",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Content-addressed storage for resource representations",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"dist",
|
|
16
|
+
"README.md"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "npm run typecheck && tsup",
|
|
20
|
+
"typecheck": "tsc --noEmit",
|
|
21
|
+
"clean": "rm -rf dist",
|
|
22
|
+
"test": "vitest run",
|
|
23
|
+
"test:watch": "vitest"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@semiont/core": "*"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"tsup": "^8.0.1",
|
|
30
|
+
"typescript": "^5.6.3"
|
|
31
|
+
},
|
|
32
|
+
"keywords": [
|
|
33
|
+
"content",
|
|
34
|
+
"storage",
|
|
35
|
+
"representation",
|
|
36
|
+
"content-addressed",
|
|
37
|
+
"deduplication",
|
|
38
|
+
"semiont"
|
|
39
|
+
],
|
|
40
|
+
"author": "The AI Alliance",
|
|
41
|
+
"license": "Apache-2.0",
|
|
42
|
+
"repository": {
|
|
43
|
+
"type": "git",
|
|
44
|
+
"url": "https://github.com/The-AI-Alliance/semiont.git",
|
|
45
|
+
"directory": "packages/content"
|
|
46
|
+
}
|
|
47
|
+
}
|