@soulcraft/brainy 5.7.4 → 5.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/dist/brainy.d.ts +29 -0
- package/dist/brainy.js +26 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +4 -0
- package/dist/storage/baseStorage.js +5 -0
- package/dist/storage/cow/BlobStorage.d.ts +6 -0
- package/dist/storage/cow/BlobStorage.js +18 -1
- package/package.json +13 -1
package/README.md
CHANGED
|
@@ -135,6 +135,50 @@ const results = await brain.find({
|
|
|
135
135
|
|
|
136
136
|
---
|
|
137
137
|
|
|
138
|
+
## Entity Extraction (NEW in v5.7.6)
|
|
139
|
+
|
|
140
|
+
**Extract entities from text with AI-powered classification:**
|
|
141
|
+
|
|
142
|
+
```javascript
|
|
143
|
+
import { Brainy, NounType } from '@soulcraft/brainy'
|
|
144
|
+
|
|
145
|
+
const brain = new Brainy()
|
|
146
|
+
await brain.init()
|
|
147
|
+
|
|
148
|
+
// Extract all entities
|
|
149
|
+
const entities = await brain.extractEntities('John Smith founded Acme Corp in New York')
|
|
150
|
+
// Returns:
|
|
151
|
+
// [
|
|
152
|
+
// { text: 'John Smith', type: NounType.Person, confidence: 0.95 },
|
|
153
|
+
// { text: 'Acme Corp', type: NounType.Organization, confidence: 0.92 },
|
|
154
|
+
// { text: 'New York', type: NounType.Location, confidence: 0.88 }
|
|
155
|
+
// ]
|
|
156
|
+
|
|
157
|
+
// Extract with filters
|
|
158
|
+
const people = await brain.extractEntities(resume, {
|
|
159
|
+
types: [NounType.Person],
|
|
160
|
+
confidence: 0.8
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
// Advanced: Direct access to extractors
|
|
164
|
+
import { SmartExtractor } from '@soulcraft/brainy'
|
|
165
|
+
|
|
166
|
+
const extractor = new SmartExtractor(brain, { minConfidence: 0.7 })
|
|
167
|
+
const result = await extractor.extract('CEO', {
|
|
168
|
+
formatContext: { format: 'excel', columnHeader: 'Title' }
|
|
169
|
+
})
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Features:**
|
|
173
|
+
- 🎯 **4-Signal Ensemble** - ExactMatch (40%) + Embedding (35%) + Pattern (20%) + Context (5%)
|
|
174
|
+
- 📊 **Format Intelligence** - Adapts to Excel, CSV, PDF, YAML, DOCX, JSON, Markdown
|
|
175
|
+
- ⚡ **Fast** - ~15-20ms per extraction with LRU caching
|
|
176
|
+
- 🌍 **42 Types** - Person, Organization, Location, Document, and 38 more
|
|
177
|
+
|
|
178
|
+
**→ [Neural Extraction Guide](docs/neural-extraction.md)** | **[Import Preview Mode](docs/neural-extraction.md#import-preview-mode)**
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
138
182
|
## From Prototype to Planet Scale
|
|
139
183
|
|
|
140
184
|
**The same API. Zero rewrites. Any scale.**
|
package/dist/brainy.d.ts
CHANGED
|
@@ -1075,6 +1075,35 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1075
1075
|
includeVectors?: boolean;
|
|
1076
1076
|
neuralMatching?: boolean;
|
|
1077
1077
|
}): Promise<ExtractedEntity[]>;
|
|
1078
|
+
/**
|
|
1079
|
+
* Extract entities from text (alias for extract())
|
|
1080
|
+
* v5.7.6: Added for API clarity and Workshop team request
|
|
1081
|
+
*
|
|
1082
|
+
* Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
|
|
1083
|
+
* - ExactMatch (40%) - Dictionary lookups
|
|
1084
|
+
* - Embedding (35%) - Semantic similarity
|
|
1085
|
+
* - Pattern (20%) - Regex patterns
|
|
1086
|
+
* - Context (5%) - Contextual hints
|
|
1087
|
+
*
|
|
1088
|
+
* @param text - Text to extract entities from
|
|
1089
|
+
* @param options - Extraction options
|
|
1090
|
+
* @returns Array of extracted entities with types and confidence scores
|
|
1091
|
+
*
|
|
1092
|
+
* @example
|
|
1093
|
+
* ```typescript
|
|
1094
|
+
* const entities = await brain.extractEntities('John Smith founded Acme Corp', {
|
|
1095
|
+
* confidence: 0.7,
|
|
1096
|
+
* types: [NounType.Person, NounType.Organization],
|
|
1097
|
+
* neuralMatching: true
|
|
1098
|
+
* })
|
|
1099
|
+
* ```
|
|
1100
|
+
*/
|
|
1101
|
+
extractEntities(text: string, options?: {
|
|
1102
|
+
types?: NounType[];
|
|
1103
|
+
confidence?: number;
|
|
1104
|
+
includeVectors?: boolean;
|
|
1105
|
+
neuralMatching?: boolean;
|
|
1106
|
+
}): Promise<ExtractedEntity[]>;
|
|
1078
1107
|
/**
|
|
1079
1108
|
* Extract concepts from text
|
|
1080
1109
|
*
|
package/dist/brainy.js
CHANGED
|
@@ -2742,6 +2742,32 @@ export class Brainy {
|
|
|
2742
2742
|
}
|
|
2743
2743
|
return await this._extractor.extract(text, options);
|
|
2744
2744
|
}
|
|
2745
|
+
/**
|
|
2746
|
+
* Extract entities from text (alias for extract())
|
|
2747
|
+
* v5.7.6: Added for API clarity and Workshop team request
|
|
2748
|
+
*
|
|
2749
|
+
* Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
|
|
2750
|
+
* - ExactMatch (40%) - Dictionary lookups
|
|
2751
|
+
* - Embedding (35%) - Semantic similarity
|
|
2752
|
+
* - Pattern (20%) - Regex patterns
|
|
2753
|
+
* - Context (5%) - Contextual hints
|
|
2754
|
+
*
|
|
2755
|
+
* @param text - Text to extract entities from
|
|
2756
|
+
* @param options - Extraction options
|
|
2757
|
+
* @returns Array of extracted entities with types and confidence scores
|
|
2758
|
+
*
|
|
2759
|
+
* @example
|
|
2760
|
+
* ```typescript
|
|
2761
|
+
* const entities = await brain.extractEntities('John Smith founded Acme Corp', {
|
|
2762
|
+
* confidence: 0.7,
|
|
2763
|
+
* types: [NounType.Person, NounType.Organization],
|
|
2764
|
+
* neuralMatching: true
|
|
2765
|
+
* })
|
|
2766
|
+
* ```
|
|
2767
|
+
*/
|
|
2768
|
+
async extractEntities(text, options) {
|
|
2769
|
+
return this.extract(text, options);
|
|
2770
|
+
}
|
|
2745
2771
|
/**
|
|
2746
2772
|
* Extract concepts from text
|
|
2747
2773
|
*
|
package/dist/index.d.ts
CHANGED
|
@@ -15,6 +15,12 @@ export { PresetName, ModelPrecision, StorageOption, FeatureSet, DistributedRole,
|
|
|
15
15
|
export { Cortex, cortex } from './cortex.js';
|
|
16
16
|
export { NeuralImport } from './cortex/neuralImport.js';
|
|
17
17
|
export type { NeuralAnalysisResult, DetectedEntity, DetectedRelationship, NeuralInsight, NeuralImportOptions } from './cortex/neuralImport.js';
|
|
18
|
+
export { NeuralEntityExtractor } from './neural/entityExtractor.js';
|
|
19
|
+
export { SmartExtractor } from './neural/SmartExtractor.js';
|
|
20
|
+
export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
|
|
21
|
+
export type { ExtractedEntity } from './neural/entityExtractor.js';
|
|
22
|
+
export type { ExtractionResult, SmartExtractorOptions, FormatContext } from './neural/SmartExtractor.js';
|
|
23
|
+
export type { RelationshipExtractionResult, SmartRelationshipExtractorOptions } from './neural/SmartRelationshipExtractor.js';
|
|
18
24
|
import { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance } from './utils/index.js';
|
|
19
25
|
export { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance };
|
|
20
26
|
export { getBrainyVersion } from './utils/version.js';
|
package/dist/index.js
CHANGED
|
@@ -31,6 +31,10 @@ getPreset, isValidPreset, getPresetsByCategory, getAllPresetNames, getPresetDesc
|
|
|
31
31
|
export { Cortex, cortex } from './cortex.js';
|
|
32
32
|
// Export Neural Import (AI data understanding)
|
|
33
33
|
export { NeuralImport } from './cortex/neuralImport.js';
|
|
34
|
+
// Export Neural Entity Extraction (v5.7.6 - Workshop request)
|
|
35
|
+
export { NeuralEntityExtractor } from './neural/entityExtractor.js';
|
|
36
|
+
export { SmartExtractor } from './neural/SmartExtractor.js';
|
|
37
|
+
export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
|
|
34
38
|
// Import Manager removed - use brain.import() instead (available on all Brainy instances)
|
|
35
39
|
// Augmentation types are already exported later in the file
|
|
36
40
|
// Export distance functions for convenience
|
|
@@ -238,6 +238,11 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
238
238
|
if (Buffer.isBuffer(data)) {
|
|
239
239
|
return data;
|
|
240
240
|
}
|
|
241
|
+
// v5.7.5: Unwrap binary data stored as {_binary: true, data: "base64..."}
|
|
242
|
+
// Fixes "Blob integrity check failed" - hash must be calculated on original content
|
|
243
|
+
if (data._binary && typeof data.data === 'string') {
|
|
244
|
+
return Buffer.from(data.data, 'base64');
|
|
245
|
+
}
|
|
241
246
|
return Buffer.from(JSON.stringify(data));
|
|
242
247
|
}
|
|
243
248
|
catch (error) {
|
|
@@ -84,6 +84,7 @@ export declare class BlobStorage {
|
|
|
84
84
|
private stats;
|
|
85
85
|
private zstdCompress?;
|
|
86
86
|
private zstdDecompress?;
|
|
87
|
+
private compressionReady;
|
|
87
88
|
private readonly CACHE_MAX_SIZE;
|
|
88
89
|
private readonly MULTIPART_THRESHOLD;
|
|
89
90
|
private readonly COMPRESSION_THRESHOLD;
|
|
@@ -96,6 +97,11 @@ export declare class BlobStorage {
|
|
|
96
97
|
* (Avoids loading if not needed)
|
|
97
98
|
*/
|
|
98
99
|
private initCompression;
|
|
100
|
+
/**
|
|
101
|
+
* v5.7.5: Ensure compression is ready before write operations
|
|
102
|
+
* Fixes race condition where write happens before async compression init completes
|
|
103
|
+
*/
|
|
104
|
+
private ensureCompressionReady;
|
|
99
105
|
/**
|
|
100
106
|
* Compute SHA-256 hash of data
|
|
101
107
|
*
|
|
@@ -29,6 +29,7 @@ import { NULL_HASH, isNullHash } from './constants.js';
|
|
|
29
29
|
*/
|
|
30
30
|
export class BlobStorage {
|
|
31
31
|
constructor(adapter, options) {
|
|
32
|
+
this.compressionReady = false;
|
|
32
33
|
// Configuration
|
|
33
34
|
this.CACHE_MAX_SIZE = 100 * 1024 * 1024; // 100MB default
|
|
34
35
|
this.MULTIPART_THRESHOLD = 5 * 1024 * 1024; // 5MB
|
|
@@ -74,6 +75,16 @@ export class BlobStorage {
|
|
|
74
75
|
this.zstdDecompress = undefined;
|
|
75
76
|
}
|
|
76
77
|
}
|
|
78
|
+
/**
|
|
79
|
+
* v5.7.5: Ensure compression is ready before write operations
|
|
80
|
+
* Fixes race condition where write happens before async compression init completes
|
|
81
|
+
*/
|
|
82
|
+
async ensureCompressionReady() {
|
|
83
|
+
if (this.compressionReady)
|
|
84
|
+
return;
|
|
85
|
+
await this.initCompression();
|
|
86
|
+
this.compressionReady = true;
|
|
87
|
+
}
|
|
77
88
|
/**
|
|
78
89
|
* Compute SHA-256 hash of data
|
|
79
90
|
*
|
|
@@ -107,6 +118,9 @@ export class BlobStorage {
|
|
|
107
118
|
this.stats.dedupSavings += data.length;
|
|
108
119
|
return hash;
|
|
109
120
|
}
|
|
121
|
+
// v5.7.5: Ensure compression is initialized before writing
|
|
122
|
+
// Fixes race condition where write happens before async init completes
|
|
123
|
+
await this.ensureCompressionReady();
|
|
110
124
|
// Determine compression strategy
|
|
111
125
|
const compression = this.selectCompression(data, options);
|
|
112
126
|
// Compress if needed
|
|
@@ -117,11 +131,14 @@ export class BlobStorage {
|
|
|
117
131
|
compressedSize = finalData.length;
|
|
118
132
|
}
|
|
119
133
|
// Create metadata
|
|
134
|
+
// v5.7.5: Store ACTUAL compression state, not intended
|
|
135
|
+
// Prevents corruption if compression failed to initialize
|
|
136
|
+
const actualCompression = finalData === data ? 'none' : compression;
|
|
120
137
|
const metadata = {
|
|
121
138
|
hash,
|
|
122
139
|
size: data.length,
|
|
123
140
|
compressedSize,
|
|
124
|
-
compression,
|
|
141
|
+
compression: actualCompression,
|
|
125
142
|
type: options.type || 'blob', // CRITICAL FIX: Use 'blob' default to match storage prefix
|
|
126
143
|
createdAt: Date.now(),
|
|
127
144
|
refCount: 1
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "5.7.
|
|
3
|
+
"version": "5.7.6",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -39,6 +39,18 @@
|
|
|
39
39
|
"./universal": {
|
|
40
40
|
"import": "./dist/universal/index.js",
|
|
41
41
|
"types": "./dist/universal/index.d.ts"
|
|
42
|
+
},
|
|
43
|
+
"./neural/entityExtractor": {
|
|
44
|
+
"import": "./dist/neural/entityExtractor.js",
|
|
45
|
+
"types": "./dist/neural/entityExtractor.d.ts"
|
|
46
|
+
},
|
|
47
|
+
"./neural/SmartExtractor": {
|
|
48
|
+
"import": "./dist/neural/SmartExtractor.js",
|
|
49
|
+
"types": "./dist/neural/SmartExtractor.d.ts"
|
|
50
|
+
},
|
|
51
|
+
"./neural/SmartRelationshipExtractor": {
|
|
52
|
+
"import": "./dist/neural/SmartRelationshipExtractor.js",
|
|
53
|
+
"types": "./dist/neural/SmartRelationshipExtractor.d.ts"
|
|
42
54
|
}
|
|
43
55
|
},
|
|
44
56
|
"browser": {
|