@soulcraft/brainy 5.7.4 → 5.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -135,6 +135,50 @@ const results = await brain.find({
135
135
 
136
136
  ---
137
137
 
138
+ ## Entity Extraction (NEW in v5.7.6)
139
+
140
+ **Extract entities from text with AI-powered classification:**
141
+
142
+ ```javascript
143
+ import { Brainy, NounType } from '@soulcraft/brainy'
144
+
145
+ const brain = new Brainy()
146
+ await brain.init()
147
+
148
+ // Extract all entities
149
+ const entities = await brain.extractEntities('John Smith founded Acme Corp in New York')
150
+ // Returns:
151
+ // [
152
+ // { text: 'John Smith', type: NounType.Person, confidence: 0.95 },
153
+ // { text: 'Acme Corp', type: NounType.Organization, confidence: 0.92 },
154
+ // { text: 'New York', type: NounType.Location, confidence: 0.88 }
155
+ // ]
156
+
157
+ // Extract with filters
158
+ const people = await brain.extractEntities(resume, {
159
+ types: [NounType.Person],
160
+ confidence: 0.8
161
+ })
162
+
163
+ // Advanced: Direct access to extractors
164
+ import { SmartExtractor } from '@soulcraft/brainy'
165
+
166
+ const extractor = new SmartExtractor(brain, { minConfidence: 0.7 })
167
+ const result = await extractor.extract('CEO', {
168
+ formatContext: { format: 'excel', columnHeader: 'Title' }
169
+ })
170
+ ```
171
+
172
+ **Features:**
173
+ - 🎯 **4-Signal Ensemble** - ExactMatch (40%) + Embedding (35%) + Pattern (20%) + Context (5%)
174
+ - 📊 **Format Intelligence** - Adapts to Excel, CSV, PDF, YAML, DOCX, JSON, Markdown
175
+ - ⚡ **Fast** - ~15-20ms per extraction with LRU caching
176
+ - 🌍 **42 Types** - Person, Organization, Location, Document, and 38 more
177
+
178
+ **→ [Neural Extraction Guide](docs/neural-extraction.md)** | **[Import Preview Mode](docs/neural-extraction.md#import-preview-mode)**
179
+
180
+ ---
181
+
138
182
  ## From Prototype to Planet Scale
139
183
 
140
184
  **The same API. Zero rewrites. Any scale.**
package/dist/brainy.d.ts CHANGED
@@ -1075,6 +1075,35 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
1075
1075
  includeVectors?: boolean;
1076
1076
  neuralMatching?: boolean;
1077
1077
  }): Promise<ExtractedEntity[]>;
1078
+ /**
1079
+ * Extract entities from text (alias for extract())
1080
+ * v5.7.6: Added for API clarity and Workshop team request
1081
+ *
1082
+ * Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
1083
+ * - ExactMatch (40%) - Dictionary lookups
1084
+ * - Embedding (35%) - Semantic similarity
1085
+ * - Pattern (20%) - Regex patterns
1086
+ * - Context (5%) - Contextual hints
1087
+ *
1088
+ * @param text - Text to extract entities from
1089
+ * @param options - Extraction options
1090
+ * @returns Array of extracted entities with types and confidence scores
1091
+ *
1092
+ * @example
1093
+ * ```typescript
1094
+ * const entities = await brain.extractEntities('John Smith founded Acme Corp', {
1095
+ * confidence: 0.7,
1096
+ * types: [NounType.Person, NounType.Organization],
1097
+ * neuralMatching: true
1098
+ * })
1099
+ * ```
1100
+ */
1101
+ extractEntities(text: string, options?: {
1102
+ types?: NounType[];
1103
+ confidence?: number;
1104
+ includeVectors?: boolean;
1105
+ neuralMatching?: boolean;
1106
+ }): Promise<ExtractedEntity[]>;
1078
1107
  /**
1079
1108
  * Extract concepts from text
1080
1109
  *
package/dist/brainy.js CHANGED
@@ -2742,6 +2742,32 @@ export class Brainy {
2742
2742
  }
2743
2743
  return await this._extractor.extract(text, options);
2744
2744
  }
2745
+ /**
2746
+ * Extract entities from text (alias for extract())
2747
+ * v5.7.6: Added for API clarity and Workshop team request
2748
+ *
2749
+ * Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
2750
+ * - ExactMatch (40%) - Dictionary lookups
2751
+ * - Embedding (35%) - Semantic similarity
2752
+ * - Pattern (20%) - Regex patterns
2753
+ * - Context (5%) - Contextual hints
2754
+ *
2755
+ * @param text - Text to extract entities from
2756
+ * @param options - Extraction options
2757
+ * @returns Array of extracted entities with types and confidence scores
2758
+ *
2759
+ * @example
2760
+ * ```typescript
2761
+ * const entities = await brain.extractEntities('John Smith founded Acme Corp', {
2762
+ * confidence: 0.7,
2763
+ * types: [NounType.Person, NounType.Organization],
2764
+ * neuralMatching: true
2765
+ * })
2766
+ * ```
2767
+ */
2768
+ async extractEntities(text, options) {
2769
+ return this.extract(text, options);
2770
+ }
2745
2771
  /**
2746
2772
  * Extract concepts from text
2747
2773
  *
package/dist/index.d.ts CHANGED
@@ -15,6 +15,12 @@ export { PresetName, ModelPrecision, StorageOption, FeatureSet, DistributedRole,
15
15
  export { Cortex, cortex } from './cortex.js';
16
16
  export { NeuralImport } from './cortex/neuralImport.js';
17
17
  export type { NeuralAnalysisResult, DetectedEntity, DetectedRelationship, NeuralInsight, NeuralImportOptions } from './cortex/neuralImport.js';
18
+ export { NeuralEntityExtractor } from './neural/entityExtractor.js';
19
+ export { SmartExtractor } from './neural/SmartExtractor.js';
20
+ export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
21
+ export type { ExtractedEntity } from './neural/entityExtractor.js';
22
+ export type { ExtractionResult, SmartExtractorOptions, FormatContext } from './neural/SmartExtractor.js';
23
+ export type { RelationshipExtractionResult, SmartRelationshipExtractorOptions } from './neural/SmartRelationshipExtractor.js';
18
24
  import { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance } from './utils/index.js';
19
25
  export { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance };
20
26
  export { getBrainyVersion } from './utils/version.js';
package/dist/index.js CHANGED
@@ -31,6 +31,10 @@ getPreset, isValidPreset, getPresetsByCategory, getAllPresetNames, getPresetDesc
31
31
  export { Cortex, cortex } from './cortex.js';
32
32
  // Export Neural Import (AI data understanding)
33
33
  export { NeuralImport } from './cortex/neuralImport.js';
34
+ // Export Neural Entity Extraction (v5.7.6 - Workshop request)
35
+ export { NeuralEntityExtractor } from './neural/entityExtractor.js';
36
+ export { SmartExtractor } from './neural/SmartExtractor.js';
37
+ export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
34
38
  // Import Manager removed - use brain.import() instead (available on all Brainy instances)
35
39
  // Augmentation types are already exported later in the file
36
40
  // Export distance functions for convenience
@@ -238,6 +238,11 @@ export class BaseStorage extends BaseStorageAdapter {
238
238
  if (Buffer.isBuffer(data)) {
239
239
  return data;
240
240
  }
241
+ // v5.7.5: Unwrap binary data stored as {_binary: true, data: "base64..."}
242
+ // Fixes "Blob integrity check failed" - hash must be calculated on original content
243
+ if (data._binary && typeof data.data === 'string') {
244
+ return Buffer.from(data.data, 'base64');
245
+ }
241
246
  return Buffer.from(JSON.stringify(data));
242
247
  }
243
248
  catch (error) {
@@ -84,6 +84,7 @@ export declare class BlobStorage {
84
84
  private stats;
85
85
  private zstdCompress?;
86
86
  private zstdDecompress?;
87
+ private compressionReady;
87
88
  private readonly CACHE_MAX_SIZE;
88
89
  private readonly MULTIPART_THRESHOLD;
89
90
  private readonly COMPRESSION_THRESHOLD;
@@ -96,6 +97,11 @@ export declare class BlobStorage {
96
97
  * (Avoids loading if not needed)
97
98
  */
98
99
  private initCompression;
100
+ /**
101
+ * v5.7.5: Ensure compression is ready before write operations
102
+ * Fixes race condition where write happens before async compression init completes
103
+ */
104
+ private ensureCompressionReady;
99
105
  /**
100
106
  * Compute SHA-256 hash of data
101
107
  *
@@ -29,6 +29,7 @@ import { NULL_HASH, isNullHash } from './constants.js';
29
29
  */
30
30
  export class BlobStorage {
31
31
  constructor(adapter, options) {
32
+ this.compressionReady = false;
32
33
  // Configuration
33
34
  this.CACHE_MAX_SIZE = 100 * 1024 * 1024; // 100MB default
34
35
  this.MULTIPART_THRESHOLD = 5 * 1024 * 1024; // 5MB
@@ -74,6 +75,16 @@ export class BlobStorage {
74
75
  this.zstdDecompress = undefined;
75
76
  }
76
77
  }
78
+ /**
79
+ * v5.7.5: Ensure compression is ready before write operations
80
+ * Fixes race condition where write happens before async compression init completes
81
+ */
82
+ async ensureCompressionReady() {
83
+ if (this.compressionReady)
84
+ return;
85
+ await this.initCompression();
86
+ this.compressionReady = true;
87
+ }
77
88
  /**
78
89
  * Compute SHA-256 hash of data
79
90
  *
@@ -107,6 +118,9 @@ export class BlobStorage {
107
118
  this.stats.dedupSavings += data.length;
108
119
  return hash;
109
120
  }
121
+ // v5.7.5: Ensure compression is initialized before writing
122
+ // Fixes race condition where write happens before async init completes
123
+ await this.ensureCompressionReady();
110
124
  // Determine compression strategy
111
125
  const compression = this.selectCompression(data, options);
112
126
  // Compress if needed
@@ -117,11 +131,14 @@ export class BlobStorage {
117
131
  compressedSize = finalData.length;
118
132
  }
119
133
  // Create metadata
134
+ // v5.7.5: Store ACTUAL compression state, not intended
135
+ // Prevents corruption if compression failed to initialize
136
+ const actualCompression = finalData === data ? 'none' : compression;
120
137
  const metadata = {
121
138
  hash,
122
139
  size: data.length,
123
140
  compressedSize,
124
- compression,
141
+ compression: actualCompression,
125
142
  type: options.type || 'blob', // CRITICAL FIX: Use 'blob' default to match storage prefix
126
143
  createdAt: Date.now(),
127
144
  refCount: 1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "5.7.4",
3
+ "version": "5.7.6",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -39,6 +39,18 @@
39
39
  "./universal": {
40
40
  "import": "./dist/universal/index.js",
41
41
  "types": "./dist/universal/index.d.ts"
42
+ },
43
+ "./neural/entityExtractor": {
44
+ "import": "./dist/neural/entityExtractor.js",
45
+ "types": "./dist/neural/entityExtractor.d.ts"
46
+ },
47
+ "./neural/SmartExtractor": {
48
+ "import": "./dist/neural/SmartExtractor.js",
49
+ "types": "./dist/neural/SmartExtractor.d.ts"
50
+ },
51
+ "./neural/SmartRelationshipExtractor": {
52
+ "import": "./dist/neural/SmartRelationshipExtractor.js",
53
+ "types": "./dist/neural/SmartRelationshipExtractor.d.ts"
42
54
  }
43
55
  },
44
56
  "browser": {