@soulcraft/brainy 5.1.2 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@
7
7
  import crypto from 'crypto';
8
8
  import { Brainy } from '../brainy.js';
9
9
  import { NounType, VerbType } from '../types/graphTypes.js';
10
+ import { mimeDetector } from './MimeTypeDetector.js';
10
11
  import { SemanticPathResolver, ProjectionRegistry, ConceptProjection, AuthorProjection, TemporalProjection, RelationshipProjection, SimilarityProjection, TagProjection } from './semantic/index.js';
11
12
  // Knowledge Layer can remain as optional augmentation for now
12
13
  import { VFSError, VFSErrorCode } from './types.js';
@@ -34,6 +35,17 @@ export class VirtualFileSystem {
34
35
  // Default configuration (will be overridden in init)
35
36
  this.config = this.getDefaultConfig();
36
37
  }
38
+ /**
39
+ * v5.2.0: Access to BlobStorage for unified file storage
40
+ */
41
+ get blobStorage() {
42
+ // TypeScript doesn't know about blobStorage on storage, use type assertion
43
+ const storage = this.brain['storage'];
44
+ if (!storage || !('blobStorage' in storage)) {
45
+ throw new Error('BlobStorage not available. Requires COW-enabled storage adapter.');
46
+ }
47
+ return storage.blobStorage;
48
+ }
37
49
  /**
38
50
  * Initialize the VFS
39
51
  */
@@ -176,47 +188,12 @@ export class VirtualFileSystem {
176
188
  if (entity.metadata.vfsType !== 'file') {
177
189
  throw new VFSError(VFSErrorCode.EISDIR, `Is a directory: ${path}`, path, 'readFile');
178
190
  }
179
- // Get content based on storage type
180
- let content;
181
- let isCompressed = false;
182
- if (!entity.metadata.storage || entity.metadata.storage.type === 'inline') {
183
- // Content stored in metadata for new files, or try entity data for compatibility
184
- if (entity.metadata.rawData) {
185
- // rawData is ALWAYS stored uncompressed as base64
186
- content = Buffer.from(entity.metadata.rawData, 'base64');
187
- isCompressed = false; // rawData is never compressed
188
- }
189
- else if (!entity.data) {
190
- content = Buffer.alloc(0);
191
- }
192
- else if (Buffer.isBuffer(entity.data)) {
193
- content = entity.data;
194
- isCompressed = entity.metadata.storage?.compressed || false;
195
- }
196
- else if (typeof entity.data === 'string') {
197
- content = Buffer.from(entity.data);
198
- }
199
- else {
200
- content = Buffer.from(JSON.stringify(entity.data));
201
- }
202
- }
203
- else if (entity.metadata.storage.type === 'reference') {
204
- // Content stored in external storage
205
- content = await this.readExternalContent(entity.metadata.storage.key);
206
- isCompressed = entity.metadata.storage.compressed || false;
207
- }
208
- else if (entity.metadata.storage.type === 'chunked') {
209
- // Content stored in chunks
210
- content = await this.readChunkedContent(entity.metadata.storage.chunks);
211
- isCompressed = entity.metadata.storage.compressed || false;
212
- }
213
- else {
214
- throw new VFSError(VFSErrorCode.EIO, `Unknown storage type: ${entity.metadata.storage.type}`, path, 'readFile');
215
- }
216
- // Decompress if needed (but NOT for rawData which is never compressed)
217
- if (isCompressed && options?.decompress !== false) {
218
- content = await this.decompress(content);
191
+ // v5.2.0: Unified blob storage - ONE path only
192
+ if (!entity.metadata.storage?.type || entity.metadata.storage.type !== 'blob') {
193
+ throw new VFSError(VFSErrorCode.EIO, `File has no blob storage: ${path}. Requires v5.2.0+ storage format.`, path, 'readFile');
219
194
  }
195
+ // Read from BlobStorage (handles decompression automatically)
196
+ const content = await this.blobStorage.read(entity.metadata.storage.hash);
220
197
  // Update access time
221
198
  await this.updateAccessTime(entityId);
222
199
  // Cache the content
@@ -259,36 +236,19 @@ export class VirtualFileSystem {
259
236
  // File doesn't exist, which is fine
260
237
  existingId = null;
261
238
  }
262
- // Determine storage strategy based on size
263
- let storageStrategy;
264
- let entityData = null;
265
- if (buffer.length <= (this.config.storage?.inline?.maxSize || 100000)) {
266
- // Store inline for small files
267
- storageStrategy = { type: 'inline' };
268
- entityData = buffer;
269
- }
270
- else if (buffer.length <= 10000000) {
271
- // Store as reference for medium files
272
- const key = await this.storeExternalContent(buffer);
273
- storageStrategy = { type: 'reference', key };
274
- }
275
- else {
276
- // Store as chunks for large files
277
- const chunks = await this.storeChunkedContent(buffer);
278
- storageStrategy = { type: 'chunked', chunks };
279
- }
280
- // Compress if beneficial
281
- if (this.shouldCompress(buffer) && options?.compress !== false) {
282
- const compressed = await this.compress(buffer);
283
- if (compressed.length < buffer.length * 0.9) { // Only if >10% savings
284
- storageStrategy.compressed = true;
285
- if (storageStrategy.type === 'inline') {
286
- entityData = compressed;
287
- }
288
- }
289
- }
290
- // Detect MIME type
291
- const mimeType = this.detectMimeType(name, buffer);
239
+ // v5.2.0: Unified blob storage for ALL files (no size-based branching)
240
+ // Store in BlobStorage (content-addressable, auto-deduplication, streaming)
241
+ const blobHash = await this.blobStorage.write(buffer);
242
+ // Get blob metadata (size, compression info)
243
+ const blobMetadata = await this.blobStorage.getMetadata(blobHash);
244
+ const storageStrategy = {
245
+ type: 'blob',
246
+ hash: blobHash,
247
+ size: buffer.length,
248
+ compressed: blobMetadata?.compressed
249
+ };
250
+ // Detect MIME type (v5.2.0: using comprehensive MimeTypeDetector)
251
+ const mimeType = mimeDetector.detectMimeType(name, buffer);
292
252
  // Create metadata
293
253
  const metadata = {
294
254
  path,
@@ -304,9 +264,9 @@ export class VirtualFileSystem {
304
264
  group: 'users',
305
265
  accessed: Date.now(),
306
266
  modified: Date.now(),
307
- storage: storageStrategy,
308
- // Store raw buffer data for retrieval
309
- rawData: buffer.toString('base64') // Store as base64 for safe serialization
267
+ storage: storageStrategy
268
+ // v5.2.0: No rawData - content is in BlobStorage
269
+ // Backward compatibility: readFile() checks for rawData for legacy files
310
270
  };
311
271
  // Extract additional metadata if enabled
312
272
  if (this.config.intelligence?.autoExtract && options?.extractMetadata !== false) {
@@ -314,9 +274,9 @@ export class VirtualFileSystem {
314
274
  }
315
275
  if (existingId) {
316
276
  // Update existing file
277
+ // v5.2.0: No entity.data - content is in BlobStorage
317
278
  await this.brain.update({
318
279
  id: existingId,
319
- data: entityData,
320
280
  metadata
321
281
  });
322
282
  // Ensure Contains relationship exists (fix for missing relationships)
@@ -338,7 +298,7 @@ export class VirtualFileSystem {
338
298
  else {
339
299
  // Create new file entity
340
300
  // For embedding: use text content, for storage: use raw data
341
- const embeddingData = this.isTextFile(mimeType) ? buffer.toString('utf-8') : `File: ${name} (${mimeType}, ${buffer.length} bytes)`;
301
+ const embeddingData = mimeDetector.isTextFile(mimeType) ? buffer.toString('utf-8') : `File: ${name} (${mimeType}, ${buffer.length} bytes)`;
342
302
  const entity = await this.brain.add({
343
303
  data: embeddingData, // Always provide string for embeddings
344
304
  type: this.getFileNounType(mimeType),
@@ -392,14 +352,9 @@ export class VirtualFileSystem {
392
352
  if (entity.metadata.vfsType !== 'file') {
393
353
  throw new VFSError(VFSErrorCode.EISDIR, `Is a directory: ${path}`, path, 'unlink');
394
354
  }
395
- // Delete external content if needed
396
- if (entity.metadata.storage) {
397
- if (entity.metadata.storage.type === 'reference') {
398
- await this.deleteExternalContent(entity.metadata.storage.key);
399
- }
400
- else if (entity.metadata.storage.type === 'chunked') {
401
- await this.deleteChunkedContent(entity.metadata.storage.chunks);
402
- }
355
+ // v5.2.0: Delete blob from BlobStorage (decrements ref count)
356
+ if (entity.metadata.storage?.type === 'blob') {
357
+ await this.blobStorage.delete(entity.metadata.storage.hash);
403
358
  }
404
359
  // Delete the entity
405
360
  await this.brain.delete(entityId);
@@ -938,34 +893,8 @@ export class VirtualFileSystem {
938
893
  return undefined;
939
894
  return filename.substring(lastDot + 1).toLowerCase();
940
895
  }
941
- detectMimeType(filename, content) {
942
- const ext = this.getExtension(filename);
943
- // Common MIME types by extension
944
- const mimeTypes = {
945
- txt: 'text/plain',
946
- html: 'text/html',
947
- css: 'text/css',
948
- js: 'application/javascript',
949
- json: 'application/json',
950
- pdf: 'application/pdf',
951
- jpg: 'image/jpeg',
952
- jpeg: 'image/jpeg',
953
- png: 'image/png',
954
- gif: 'image/gif',
955
- mp3: 'audio/mpeg',
956
- mp4: 'video/mp4',
957
- zip: 'application/zip'
958
- };
959
- return mimeTypes[ext || ''] || 'application/octet-stream';
960
- }
961
- isTextFile(mimeType) {
962
- return mimeType.startsWith('text/') ||
963
- mimeType.includes('json') ||
964
- mimeType.includes('javascript') ||
965
- mimeType.includes('xml') ||
966
- mimeType.includes('yaml') ||
967
- mimeType === 'application/json';
968
- }
896
+ // v5.2.0: MIME detection moved to MimeTypeDetector service
897
+ // Removed detectMimeType() and isTextFile() - now using mimeDetector singleton
969
898
  getFileNounType(mimeType) {
970
899
  if (mimeType.startsWith('text/') || mimeType.includes('json')) {
971
900
  return NounType.Document;
@@ -975,122 +904,13 @@ export class VirtualFileSystem {
975
904
  }
976
905
  return NounType.File;
977
906
  }
978
- shouldCompress(buffer) {
979
- if (!this.config.storage?.compression?.enabled)
980
- return false;
981
- if (buffer.length < (this.config.storage.compression.minSize || 10000))
982
- return false;
983
- // Don't compress already compressed formats
984
- const firstBytes = buffer.slice(0, 4).toString('hex');
985
- const compressedSignatures = [
986
- '504b0304', // ZIP
987
- '1f8b', // GZIP
988
- '425a', // BZIP2
989
- '89504e47', // PNG
990
- 'ffd8ff' // JPEG
991
- ];
992
- return !compressedSignatures.some(sig => firstBytes.startsWith(sig));
993
- }
994
- // External storage methods - leverages Brainy's storage adapters (memory, file, S3, R2)
995
- async readExternalContent(key) {
996
- // Read from Brainy - Brainy's storage adapter handles retrieval
997
- const entity = await this.brain.get(key);
998
- if (!entity) {
999
- throw new Error(`External content not found: ${key}`);
1000
- }
1001
- // Content is stored in the data field
1002
- // Brainy handles storage/retrieval through its adapters (memory, file, S3, R2)
1003
- return Buffer.isBuffer(entity.data) ? entity.data : Buffer.from(entity.data);
1004
- }
1005
- async storeExternalContent(buffer) {
1006
- // Store as Brainy entity - let Brainy's storage adapter handle it
1007
- // Brainy automatically handles large data through its storage adapters (memory, file, S3, R2)
1008
- const entityId = await this.brain.add({
1009
- data: buffer, // Store actual buffer - Brainy will handle it efficiently
1010
- type: NounType.File,
1011
- metadata: {
1012
- vfsType: 'external-storage',
1013
- size: buffer.length,
1014
- created: Date.now()
1015
- }
1016
- });
1017
- return entityId;
1018
- }
1019
- async deleteExternalContent(key) {
1020
- // Delete the external storage entity
1021
- try {
1022
- await this.brain.delete(key);
1023
- }
1024
- catch (error) {
1025
- console.debug('Failed to delete external content:', key, error);
1026
- }
1027
- }
1028
- async readChunkedContent(chunks) {
1029
- // Read all chunk entities and combine
1030
- const buffers = [];
1031
- for (const chunkId of chunks) {
1032
- const entity = await this.brain.get(chunkId);
1033
- if (!entity) {
1034
- throw new Error(`Chunk not found: ${chunkId}`);
1035
- }
1036
- // Read actual data from entity - Brainy handles storage
1037
- const chunkBuffer = Buffer.isBuffer(entity.data) ? entity.data : Buffer.from(entity.data);
1038
- buffers.push(chunkBuffer);
1039
- }
1040
- return Buffer.concat(buffers);
1041
- }
1042
- async storeChunkedContent(buffer) {
1043
- const chunkSize = this.config.storage?.chunking?.chunkSize || 5000000; // 5MB chunks
1044
- const chunks = [];
1045
- for (let i = 0; i < buffer.length; i += chunkSize) {
1046
- const chunk = buffer.slice(i, Math.min(i + chunkSize, buffer.length));
1047
- // Store each chunk as a separate entity
1048
- // Let Brainy handle the chunk data efficiently
1049
- const chunkId = await this.brain.add({
1050
- data: chunk, // Store actual chunk - Brainy handles it
1051
- type: NounType.File,
1052
- metadata: {
1053
- vfsType: 'chunk',
1054
- chunkIndex: chunks.length,
1055
- size: chunk.length,
1056
- created: Date.now()
1057
- }
1058
- });
1059
- chunks.push(chunkId);
1060
- }
1061
- return chunks;
1062
- }
1063
- async deleteChunkedContent(chunks) {
1064
- // Delete all chunk entities
1065
- await Promise.all(chunks.map(chunkId => this.brain.delete(chunkId).catch(err => console.debug('Failed to delete chunk:', chunkId, err))));
1066
- }
1067
- async compress(buffer) {
1068
- const zlib = await import('zlib');
1069
- return new Promise((resolve, reject) => {
1070
- zlib.gzip(buffer, (err, compressed) => {
1071
- if (err)
1072
- reject(err);
1073
- else
1074
- resolve(compressed);
1075
- });
1076
- });
1077
- }
1078
- async decompress(buffer) {
1079
- const zlib = await import('zlib');
1080
- return new Promise((resolve, reject) => {
1081
- zlib.gunzip(buffer, (err, decompressed) => {
1082
- if (err)
1083
- reject(err);
1084
- else
1085
- resolve(decompressed);
1086
- });
1087
- });
1088
- }
907
+ // v5.2.0: Removed compression methods (shouldCompress, compress, decompress)
908
+ // BlobStorage handles all compression automatically with zstd
1089
909
  async generateEmbedding(buffer, mimeType) {
1090
910
  try {
1091
911
  // Use text content for text files, description for binary
1092
912
  let content;
1093
- if (this.isTextFile(mimeType)) {
913
+ if (mimeDetector.isTextFile(mimeType)) {
1094
914
  // Use first 10KB for embedding
1095
915
  content = buffer.toString('utf8', 0, Math.min(10240, buffer.length));
1096
916
  }
@@ -1119,7 +939,7 @@ export class VirtualFileSystem {
1119
939
  async extractMetadata(buffer, mimeType) {
1120
940
  const metadata = {};
1121
941
  // Extract basic metadata based on content type
1122
- if (this.isTextFile(mimeType)) {
942
+ if (mimeDetector.isTextFile(mimeType)) {
1123
943
  const text = buffer.toString('utf8');
1124
944
  metadata.lineCount = text.split('\n').length;
1125
945
  metadata.wordCount = text.split(/\s+/).filter(w => w).length;
@@ -86,8 +86,4 @@ export declare class DirectoryImporter {
86
86
  * Check if a path should be skipped
87
87
  */
88
88
  private shouldSkip;
89
- /**
90
- * Detect MIME type from file content and extension
91
- */
92
- private detectMimeType;
93
89
  }
@@ -279,45 +279,5 @@ export class DirectoryImporter {
279
279
  }
280
280
  return false;
281
281
  }
282
- /**
283
- * Detect MIME type from file content and extension
284
- */
285
- detectMimeType(filePath, content) {
286
- const ext = path.extname(filePath).toLowerCase();
287
- // Common extensions
288
- const mimeTypes = {
289
- '.js': 'application/javascript',
290
- '.ts': 'application/typescript',
291
- '.jsx': 'application/javascript',
292
- '.tsx': 'application/typescript',
293
- '.json': 'application/json',
294
- '.md': 'text/markdown',
295
- '.html': 'text/html',
296
- '.css': 'text/css',
297
- '.py': 'text/x-python',
298
- '.go': 'text/x-go',
299
- '.rs': 'text/x-rust',
300
- '.java': 'text/x-java',
301
- '.cpp': 'text/x-c++',
302
- '.c': 'text/x-c',
303
- '.h': 'text/x-c',
304
- '.txt': 'text/plain',
305
- '.xml': 'application/xml',
306
- '.yaml': 'text/yaml',
307
- '.yml': 'text/yaml',
308
- '.toml': 'text/toml',
309
- '.sh': 'text/x-shellscript',
310
- '.pdf': 'application/pdf',
311
- '.jpg': 'image/jpeg',
312
- '.jpeg': 'image/jpeg',
313
- '.png': 'image/png',
314
- '.gif': 'image/gif',
315
- '.svg': 'image/svg+xml',
316
- '.mp3': 'audio/mpeg',
317
- '.mp4': 'video/mp4',
318
- '.zip': 'application/zip'
319
- };
320
- return mimeTypes[ext] || 'application/octet-stream';
321
- }
322
282
  }
323
283
  //# sourceMappingURL=DirectoryImporter.js.map
@@ -7,6 +7,7 @@
7
7
  export { VirtualFileSystem } from './VirtualFileSystem.js';
8
8
  export { PathResolver } from './PathResolver.js';
9
9
  export * from './types.js';
10
+ export { MimeTypeDetector, mimeDetector } from './MimeTypeDetector.js';
10
11
  export { FSCompat, createFS } from './FSCompat.js';
11
12
  export { DirectoryImporter } from './importers/DirectoryImporter.js';
12
13
  export { VFSReadStream } from './streams/VFSReadStream.js';
package/dist/vfs/index.js CHANGED
@@ -8,6 +8,8 @@
8
8
  export { VirtualFileSystem } from './VirtualFileSystem.js';
9
9
  export { PathResolver } from './PathResolver.js';
10
10
  export * from './types.js';
11
+ // MIME Type Detection (v5.2.0)
12
+ export { MimeTypeDetector, mimeDetector } from './MimeTypeDetector.js';
11
13
  // fs compatibility layer
12
14
  export { FSCompat, createFS } from './FSCompat.js';
13
15
  // Directory import
@@ -37,9 +37,9 @@ export interface VFSMetadata {
37
37
  accessed: number;
38
38
  modified: number;
39
39
  storage?: {
40
- type: 'inline' | 'reference' | 'chunked';
41
- key?: string;
42
- chunks?: string[];
40
+ type: 'blob';
41
+ hash: string;
42
+ size: number;
43
43
  compressed?: boolean;
44
44
  };
45
45
  attributes?: Record<string, any>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "5.1.2",
3
+ "version": "5.2.1",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -147,7 +147,9 @@
147
147
  "@rollup/plugin-replace": "^6.0.2",
148
148
  "@rollup/plugin-terser": "^0.4.4",
149
149
  "@testcontainers/redis": "^11.5.1",
150
+ "@types/mime": "^3.0.4",
150
151
  "@types/node": "^20.11.30",
152
+ "@types/sharp": "^0.31.1",
151
153
  "@types/uuid": "^10.0.0",
152
154
  "@types/ws": "^8.18.1",
153
155
  "@typescript-eslint/eslint-plugin": "^8.0.0",
@@ -175,13 +177,16 @@
175
177
  "cli-table3": "^0.6.5",
176
178
  "commander": "^11.1.0",
177
179
  "csv-parse": "^6.1.0",
180
+ "exifr": "^7.1.3",
178
181
  "inquirer": "^12.9.3",
179
182
  "js-yaml": "^4.1.0",
180
183
  "mammoth": "^1.11.0",
184
+ "mime": "^4.1.0",
181
185
  "ora": "^8.2.0",
182
186
  "pdfjs-dist": "^4.0.379",
183
187
  "prompts": "^2.4.2",
184
188
  "roaring-wasm": "^1.1.0",
189
+ "sharp": "^0.33.5",
185
190
  "uuid": "^9.0.1",
186
191
  "ws": "^8.18.3",
187
192
  "xlsx": "^0.18.5"