@soulcraft/brainy 3.21.0 → 3.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +40 -0
- package/dist/augmentations/defaultAugmentations.d.ts +6 -0
- package/dist/augmentations/defaultAugmentations.js +12 -0
- package/dist/augmentations/intelligentImport/IntelligentImportAugmentation.d.ts +51 -0
- package/dist/augmentations/intelligentImport/IntelligentImportAugmentation.js +185 -0
- package/dist/augmentations/intelligentImport/handlers/base.d.ts +49 -0
- package/dist/augmentations/intelligentImport/handlers/base.js +149 -0
- package/dist/augmentations/intelligentImport/handlers/csvHandler.d.ts +34 -0
- package/dist/augmentations/intelligentImport/handlers/csvHandler.js +185 -0
- package/dist/augmentations/intelligentImport/handlers/excelHandler.d.ts +31 -0
- package/dist/augmentations/intelligentImport/handlers/excelHandler.js +148 -0
- package/dist/augmentations/intelligentImport/handlers/pdfHandler.d.ts +35 -0
- package/dist/augmentations/intelligentImport/handlers/pdfHandler.js +247 -0
- package/dist/augmentations/intelligentImport/index.d.ts +9 -0
- package/dist/augmentations/intelligentImport/index.js +9 -0
- package/dist/augmentations/intelligentImport/types.d.ts +111 -0
- package/dist/augmentations/intelligentImport/types.js +6 -0
- package/package.json +7 -2
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for Intelligent Import Augmentation
|
|
3
|
+
* Handles Excel, PDF, and CSV import with intelligent extraction
|
|
4
|
+
*/
|
|
5
|
+
export interface FormatHandler {
|
|
6
|
+
/**
|
|
7
|
+
* Format name (e.g., 'csv', 'xlsx', 'pdf')
|
|
8
|
+
*/
|
|
9
|
+
readonly format: string;
|
|
10
|
+
/**
|
|
11
|
+
* Process raw data into structured format
|
|
12
|
+
* @param data Raw file data (Buffer or string)
|
|
13
|
+
* @param options Format-specific options
|
|
14
|
+
* @returns Structured data ready for entity extraction
|
|
15
|
+
*/
|
|
16
|
+
process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
|
|
17
|
+
/**
|
|
18
|
+
* Detect if this handler can process the given data
|
|
19
|
+
* @param data Raw data or filename
|
|
20
|
+
* @returns true if handler supports this format
|
|
21
|
+
*/
|
|
22
|
+
canHandle(data: Buffer | string | {
|
|
23
|
+
filename?: string;
|
|
24
|
+
ext?: string;
|
|
25
|
+
}): boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface FormatHandlerOptions {
|
|
28
|
+
/** Source filename (for extension detection) */
|
|
29
|
+
filename?: string;
|
|
30
|
+
/** File extension (if known) */
|
|
31
|
+
ext?: string;
|
|
32
|
+
/** Encoding (auto-detected if not specified) */
|
|
33
|
+
encoding?: string;
|
|
34
|
+
/** CSV-specific: delimiter character */
|
|
35
|
+
csvDelimiter?: string;
|
|
36
|
+
/** CSV-specific: whether first row is headers */
|
|
37
|
+
csvHeaders?: boolean;
|
|
38
|
+
/** Excel-specific: sheet names to extract (or 'all') */
|
|
39
|
+
excelSheets?: string[] | 'all';
|
|
40
|
+
/** Excel-specific: whether to evaluate formulas */
|
|
41
|
+
excelEvaluateFormulas?: boolean;
|
|
42
|
+
/** PDF-specific: whether to extract tables */
|
|
43
|
+
pdfExtractTables?: boolean;
|
|
44
|
+
/** PDF-specific: whether to preserve layout */
|
|
45
|
+
pdfPreserveLayout?: boolean;
|
|
46
|
+
/** Maximum rows to process (for large files) */
|
|
47
|
+
maxRows?: number;
|
|
48
|
+
/** Whether to stream large files */
|
|
49
|
+
streaming?: boolean;
|
|
50
|
+
}
|
|
51
|
+
export interface ProcessedData {
|
|
52
|
+
/** Format that was processed */
|
|
53
|
+
format: string;
|
|
54
|
+
/** Structured data (array of objects) */
|
|
55
|
+
data: Array<Record<string, any>>;
|
|
56
|
+
/** Metadata about the processed data */
|
|
57
|
+
metadata: {
|
|
58
|
+
/** Number of rows/entities extracted */
|
|
59
|
+
rowCount: number;
|
|
60
|
+
/** Column/field names */
|
|
61
|
+
fields: string[];
|
|
62
|
+
/** Detected encoding (for text formats) */
|
|
63
|
+
encoding?: string;
|
|
64
|
+
/** Excel: sheet names */
|
|
65
|
+
sheets?: string[];
|
|
66
|
+
/** PDF: page count */
|
|
67
|
+
pageCount?: number;
|
|
68
|
+
/** PDF: extracted text length */
|
|
69
|
+
textLength?: number;
|
|
70
|
+
/** PDF: number of tables detected */
|
|
71
|
+
tableCount?: number;
|
|
72
|
+
/** Processing time in milliseconds */
|
|
73
|
+
processingTime: number;
|
|
74
|
+
/** Any warnings during processing */
|
|
75
|
+
warnings?: string[];
|
|
76
|
+
/** Format-specific metadata */
|
|
77
|
+
[key: string]: any;
|
|
78
|
+
};
|
|
79
|
+
/** Original filename (if available) */
|
|
80
|
+
filename?: string;
|
|
81
|
+
}
|
|
82
|
+
export interface HandlerRegistry {
|
|
83
|
+
/** Registered handlers by format extension */
|
|
84
|
+
handlers: Map<string, () => Promise<FormatHandler>>;
|
|
85
|
+
/** Loaded handler instances (lazy-loaded) */
|
|
86
|
+
loaded: Map<string, FormatHandler>;
|
|
87
|
+
/** Register a new handler */
|
|
88
|
+
register(extensions: string[], loader: () => Promise<FormatHandler>): void;
|
|
89
|
+
/** Get handler for a file/format */
|
|
90
|
+
getHandler(filenameOrExt: string): Promise<FormatHandler | null>;
|
|
91
|
+
}
|
|
92
|
+
export interface IntelligentImportConfig {
|
|
93
|
+
/** Enable CSV handler */
|
|
94
|
+
enableCSV: boolean;
|
|
95
|
+
/** Enable Excel handler */
|
|
96
|
+
enableExcel: boolean;
|
|
97
|
+
/** Enable PDF handler */
|
|
98
|
+
enablePDF: boolean;
|
|
99
|
+
/** Default options for CSV */
|
|
100
|
+
csvDefaults?: Partial<FormatHandlerOptions>;
|
|
101
|
+
/** Default options for Excel */
|
|
102
|
+
excelDefaults?: Partial<FormatHandlerOptions>;
|
|
103
|
+
/** Default options for PDF */
|
|
104
|
+
pdfDefaults?: Partial<FormatHandlerOptions>;
|
|
105
|
+
/** Maximum file size to process (bytes) */
|
|
106
|
+
maxFileSize?: number;
|
|
107
|
+
/** Enable caching of processed data */
|
|
108
|
+
enableCache?: boolean;
|
|
109
|
+
/** Cache TTL in milliseconds */
|
|
110
|
+
cacheTTL?: number;
|
|
111
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.22.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -147,6 +147,7 @@
|
|
|
147
147
|
"@typescript-eslint/eslint-plugin": "^8.0.0",
|
|
148
148
|
"@typescript-eslint/parser": "^8.0.0",
|
|
149
149
|
"@vitest/coverage-v8": "^3.2.4",
|
|
150
|
+
"jspdf": "^3.0.3",
|
|
150
151
|
"minio": "^8.0.5",
|
|
151
152
|
"standard-version": "^9.5.0",
|
|
152
153
|
"testcontainers": "^11.5.1",
|
|
@@ -159,13 +160,17 @@
|
|
|
159
160
|
"@huggingface/transformers": "^3.7.2",
|
|
160
161
|
"boxen": "^8.0.1",
|
|
161
162
|
"chalk": "^5.3.0",
|
|
163
|
+
"chardet": "^2.0.0",
|
|
162
164
|
"cli-table3": "^0.6.5",
|
|
163
165
|
"commander": "^11.1.0",
|
|
166
|
+
"csv-parse": "^6.1.0",
|
|
164
167
|
"inquirer": "^12.9.3",
|
|
165
168
|
"ora": "^8.2.0",
|
|
169
|
+
"pdfjs-dist": "^4.0.379",
|
|
166
170
|
"prompts": "^2.4.2",
|
|
167
171
|
"uuid": "^9.0.1",
|
|
168
|
-
"ws": "^8.18.3"
|
|
172
|
+
"ws": "^8.18.3",
|
|
173
|
+
"xlsx": "^0.18.5"
|
|
169
174
|
},
|
|
170
175
|
"prettier": {
|
|
171
176
|
"arrowParens": "always",
|