@soulcraft/brainy 3.21.0 → 3.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ /**
2
+ * CSV Format Handler
3
+ * Handles CSV files with:
4
+ * - Automatic encoding detection
5
+ * - Automatic delimiter detection
6
+ * - Streaming for large files
7
+ * - Type inference
8
+ */
9
+ import { parse } from 'csv-parse/sync';
10
+ import { detect as detectEncoding } from 'chardet';
11
+ import { BaseFormatHandler } from './base.js';
12
+ export class CSVHandler extends BaseFormatHandler {
13
+ constructor() {
14
+ super(...arguments);
15
+ this.format = 'csv';
16
+ }
17
+ canHandle(data) {
18
+ const ext = this.detectExtension(data);
19
+ if (ext === 'csv' || ext === 'tsv' || ext === 'txt')
20
+ return true;
21
+ // Check content if it's a buffer
22
+ if (Buffer.isBuffer(data)) {
23
+ const sample = data.slice(0, 1024).toString('utf-8');
24
+ return this.looksLikeCSV(sample);
25
+ }
26
+ if (typeof data === 'string') {
27
+ return this.looksLikeCSV(data.slice(0, 1024));
28
+ }
29
+ return false;
30
+ }
31
+ async process(data, options) {
32
+ const startTime = Date.now();
33
+ // Convert to buffer if string
34
+ const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'utf-8');
35
+ // Detect encoding
36
+ const detectedEncoding = options.encoding || this.detectEncodingSafe(buffer);
37
+ const text = buffer.toString(detectedEncoding);
38
+ // Detect delimiter if not specified
39
+ const delimiter = options.csvDelimiter || this.detectDelimiter(text);
40
+ // Parse CSV
41
+ const hasHeaders = options.csvHeaders !== false;
42
+ const maxRows = options.maxRows;
43
+ try {
44
+ const records = parse(text, {
45
+ columns: hasHeaders,
46
+ skip_empty_lines: true,
47
+ trim: true,
48
+ delimiter,
49
+ relax_column_count: true,
50
+ to: maxRows,
51
+ cast: false // We'll do type inference ourselves
52
+ });
53
+ // Convert to array of objects
54
+ const data = Array.isArray(records) ? records : [records];
55
+ // Infer types and convert values
56
+ const fields = data.length > 0 ? Object.keys(data[0]) : [];
57
+ const types = this.inferFieldTypes(data);
58
+ const convertedData = data.map(row => {
59
+ const converted = {};
60
+ for (const [key, value] of Object.entries(row)) {
61
+ converted[key] = this.convertValue(value, types[key] || 'string');
62
+ }
63
+ return converted;
64
+ });
65
+ const processingTime = Date.now() - startTime;
66
+ return {
67
+ format: this.format,
68
+ data: convertedData,
69
+ metadata: this.createMetadata(convertedData.length, fields, processingTime, {
70
+ encoding: detectedEncoding,
71
+ delimiter,
72
+ hasHeaders,
73
+ types
74
+ }),
75
+ filename: options.filename
76
+ };
77
+ }
78
+ catch (error) {
79
+ throw new Error(`CSV parsing failed: ${error instanceof Error ? error.message : String(error)}`);
80
+ }
81
+ }
82
+ /**
83
+ * Check if text looks like CSV
84
+ */
85
+ looksLikeCSV(text) {
86
+ const lines = text.split('\n').filter(l => l.trim());
87
+ if (lines.length < 2)
88
+ return false;
89
+ // Check for common delimiters
90
+ const delimiters = [',', ';', '\t', '|'];
91
+ for (const delimiter of delimiters) {
92
+ const firstCount = (lines[0].match(new RegExp(`\\${delimiter}`, 'g')) || []).length;
93
+ if (firstCount === 0)
94
+ continue;
95
+ const secondCount = (lines[1].match(new RegExp(`\\${delimiter}`, 'g')) || []).length;
96
+ if (firstCount === secondCount)
97
+ return true;
98
+ }
99
+ return false;
100
+ }
101
+ /**
102
+ * Detect CSV delimiter
103
+ */
104
+ detectDelimiter(text) {
105
+ const sample = text.split('\n').slice(0, 10).join('\n');
106
+ const delimiters = [',', ';', '\t', '|'];
107
+ const counts = {};
108
+ for (const delimiter of delimiters) {
109
+ const lines = sample.split('\n').filter(l => l.trim());
110
+ if (lines.length < 2)
111
+ continue;
112
+ // Count delimiter in first line
113
+ const firstCount = (lines[0].match(new RegExp(`\\${delimiter}`, 'g')) || []).length;
114
+ if (firstCount === 0)
115
+ continue;
116
+ // Check if count is consistent across lines
117
+ let consistent = true;
118
+ for (let i = 1; i < Math.min(5, lines.length); i++) {
119
+ const count = (lines[i].match(new RegExp(`\\${delimiter}`, 'g')) || []).length;
120
+ if (count !== firstCount) {
121
+ consistent = false;
122
+ break;
123
+ }
124
+ }
125
+ if (consistent) {
126
+ counts[delimiter] = firstCount;
127
+ }
128
+ }
129
+ // Return delimiter with highest count
130
+ const best = Object.entries(counts).sort((a, b) => b[1] - a[1])[0];
131
+ return best ? best[0] : ',';
132
+ }
133
+ /**
134
+ * Detect encoding safely (with fallback)
135
+ */
136
+ detectEncodingSafe(buffer) {
137
+ try {
138
+ const detected = detectEncoding(buffer);
139
+ if (!detected)
140
+ return 'utf-8';
141
+ // Normalize encoding to Node.js-supported names
142
+ return this.normalizeEncoding(detected);
143
+ }
144
+ catch {
145
+ return 'utf-8';
146
+ }
147
+ }
148
+ /**
149
+ * Normalize encoding names to Node.js-supported encodings
150
+ */
151
+ normalizeEncoding(encoding) {
152
+ const normalized = encoding.toLowerCase().replace(/[_-]/g, '');
153
+ // Map common encodings to Node.js names
154
+ const mappings = {
155
+ 'iso88591': 'latin1',
156
+ 'iso88592': 'latin1',
157
+ 'iso88593': 'latin1',
158
+ 'iso88594': 'latin1',
159
+ 'iso88595': 'latin1',
160
+ 'iso88596': 'latin1',
161
+ 'iso88597': 'latin1',
162
+ 'iso88598': 'latin1',
163
+ 'iso88599': 'latin1',
164
+ 'iso885910': 'latin1',
165
+ 'iso885913': 'latin1',
166
+ 'iso885914': 'latin1',
167
+ 'iso885915': 'latin1',
168
+ 'iso885916': 'latin1',
169
+ 'usascii': 'ascii',
170
+ 'utf8': 'utf8',
171
+ 'utf16le': 'utf16le',
172
+ 'utf16be': 'utf16le',
173
+ 'windows1252': 'latin1',
174
+ 'windows1251': 'utf8', // Cyrillic - best effort
175
+ 'big5': 'utf8', // Chinese - best effort
176
+ 'gbk': 'utf8', // Chinese - best effort
177
+ 'gb2312': 'utf8', // Chinese - best effort
178
+ 'shiftjis': 'utf8', // Japanese - best effort
179
+ 'eucjp': 'utf8', // Japanese - best effort
180
+ 'euckr': 'utf8' // Korean - best effort
181
+ };
182
+ return mappings[normalized] || 'utf8';
183
+ }
184
+ }
185
+ //# sourceMappingURL=csvHandler.js.map
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Excel Format Handler
3
+ * Handles Excel files (.xlsx, .xls, .xlsb) with:
4
+ * - Multi-sheet extraction
5
+ * - Type inference
6
+ * - Formula evaluation
7
+ * - Metadata extraction
8
+ */
9
+ import { BaseFormatHandler } from './base.js';
10
+ import { FormatHandlerOptions, ProcessedData } from '../types.js';
11
+ export declare class ExcelHandler extends BaseFormatHandler {
12
+ readonly format = "excel";
13
+ canHandle(data: Buffer | string | {
14
+ filename?: string;
15
+ ext?: string;
16
+ }): boolean;
17
+ process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
18
+ /**
19
+ * Determine which sheets to process
20
+ */
21
+ private getSheetsToProcess;
22
+ /**
23
+ * Check if a number is likely an Excel date
24
+ * Excel stores dates as days since 1900-01-01
25
+ */
26
+ private isExcelDate;
27
+ /**
28
+ * Convert Excel date (days since 1900-01-01) to JS Date
29
+ */
30
+ private excelDateToJSDate;
31
+ }
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Excel Format Handler
3
+ * Handles Excel files (.xlsx, .xls, .xlsb) with:
4
+ * - Multi-sheet extraction
5
+ * - Type inference
6
+ * - Formula evaluation
7
+ * - Metadata extraction
8
+ */
9
+ import * as XLSX from 'xlsx';
10
+ import { BaseFormatHandler } from './base.js';
11
+ export class ExcelHandler extends BaseFormatHandler {
12
+ constructor() {
13
+ super(...arguments);
14
+ this.format = 'excel';
15
+ }
16
+ canHandle(data) {
17
+ const ext = this.detectExtension(data);
18
+ return ['xlsx', 'xls', 'xlsb', 'xlsm', 'xlt', 'xltx', 'xltm'].includes(ext || '');
19
+ }
20
+ async process(data, options) {
21
+ const startTime = Date.now();
22
+ // Convert to buffer if string (though Excel should always be binary)
23
+ const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'binary');
24
+ try {
25
+ // Read workbook
26
+ const workbook = XLSX.read(buffer, {
27
+ type: 'buffer',
28
+ cellDates: true,
29
+ cellNF: true,
30
+ cellStyles: true
31
+ });
32
+ // Determine which sheets to process
33
+ const sheetsToProcess = this.getSheetsToProcess(workbook, options);
34
+ // Extract data from sheets
35
+ const allData = [];
36
+ const sheetMetadata = {};
37
+ for (const sheetName of sheetsToProcess) {
38
+ const sheet = workbook.Sheets[sheetName];
39
+ if (!sheet)
40
+ continue;
41
+ // Convert sheet to JSON with headers
42
+ const sheetData = XLSX.utils.sheet_to_json(sheet, {
43
+ header: 1, // Get as array of arrays first
44
+ defval: null,
45
+ blankrows: false,
46
+ raw: false // Convert to formatted strings
47
+ });
48
+ if (sheetData.length === 0)
49
+ continue;
50
+ // First row is headers
51
+ const headers = sheetData[0].map((h) => this.sanitizeFieldName(String(h || '')));
52
+ // Skip if no headers
53
+ if (headers.length === 0)
54
+ continue;
55
+ // Convert rows to objects
56
+ for (let i = 1; i < sheetData.length; i++) {
57
+ const row = sheetData[i];
58
+ const rowObj = {};
59
+ // Add sheet name to each row
60
+ rowObj._sheet = sheetName;
61
+ for (let j = 0; j < headers.length; j++) {
62
+ const header = headers[j];
63
+ let value = row[j];
64
+ // Convert Excel dates
65
+ if (value && typeof value === 'number' && this.isExcelDate(value)) {
66
+ value = this.excelDateToJSDate(value);
67
+ }
68
+ rowObj[header] = value === undefined ? null : value;
69
+ }
70
+ allData.push(rowObj);
71
+ }
72
+ // Store sheet metadata
73
+ sheetMetadata[sheetName] = {
74
+ rowCount: sheetData.length - 1, // Exclude header row
75
+ columnCount: headers.length,
76
+ headers
77
+ };
78
+ }
79
+ // Infer types (excluding _sheet field)
80
+ const fields = allData.length > 0 ? Object.keys(allData[0]).filter(k => k !== '_sheet') : [];
81
+ const types = this.inferFieldTypes(allData);
82
+ // Convert values to appropriate types
83
+ const convertedData = allData.map(row => {
84
+ const converted = {};
85
+ for (const [key, value] of Object.entries(row)) {
86
+ if (key === '_sheet') {
87
+ converted[key] = value;
88
+ }
89
+ else {
90
+ converted[key] = this.convertValue(value, types[key] || 'string');
91
+ }
92
+ }
93
+ return converted;
94
+ });
95
+ const processingTime = Date.now() - startTime;
96
+ return {
97
+ format: this.format,
98
+ data: convertedData,
99
+ metadata: this.createMetadata(convertedData.length, fields, processingTime, {
100
+ sheets: sheetsToProcess,
101
+ sheetCount: sheetsToProcess.length,
102
+ sheetMetadata,
103
+ types,
104
+ workbookInfo: {
105
+ sheetNames: workbook.SheetNames,
106
+ properties: workbook.Props || {}
107
+ }
108
+ }),
109
+ filename: options.filename
110
+ };
111
+ }
112
+ catch (error) {
113
+ throw new Error(`Excel parsing failed: ${error instanceof Error ? error.message : String(error)}`);
114
+ }
115
+ }
116
+ /**
117
+ * Determine which sheets to process
118
+ */
119
+ getSheetsToProcess(workbook, options) {
120
+ const allSheets = workbook.SheetNames;
121
+ // If specific sheets requested
122
+ if (options.excelSheets && options.excelSheets !== 'all') {
123
+ return options.excelSheets.filter(name => allSheets.includes(name));
124
+ }
125
+ // Otherwise process all sheets
126
+ return allSheets;
127
+ }
128
+ /**
129
+ * Check if a number is likely an Excel date
130
+ * Excel stores dates as days since 1900-01-01
131
+ */
132
+ isExcelDate(value) {
133
+ // Excel dates are typically between 1 and 60000 (1900 to 2064)
134
+ // This is a heuristic - not perfect but catches most cases
135
+ return value > 0 && value < 100000 && Number.isInteger(value);
136
+ }
137
+ /**
138
+ * Convert Excel date (days since 1900-01-01) to JS Date
139
+ */
140
+ excelDateToJSDate(excelDate) {
141
+ // Excel's epoch is 1900-01-01, but there's a bug where it thinks 1900 is a leap year
142
+ // So dates before March 1, 1900 are off by one day
143
+ const epoch = new Date(1899, 11, 30); // Dec 30, 1899
144
+ const msPerDay = 24 * 60 * 60 * 1000;
145
+ return new Date(epoch.getTime() + excelDate * msPerDay);
146
+ }
147
+ }
148
+ //# sourceMappingURL=excelHandler.js.map
@@ -0,0 +1,35 @@
1
+ /**
2
+ * PDF Format Handler
3
+ * Handles PDF files with:
4
+ * - Text extraction with layout preservation
5
+ * - Table detection and extraction
6
+ * - Metadata extraction (author, dates, etc.)
7
+ * - Page-by-page processing
8
+ */
9
+ import { BaseFormatHandler } from './base.js';
10
+ import { FormatHandlerOptions, ProcessedData } from '../types.js';
11
+ export declare class PDFHandler extends BaseFormatHandler {
12
+ readonly format = "pdf";
13
+ canHandle(data: Buffer | string | {
14
+ filename?: string;
15
+ ext?: string;
16
+ }): boolean;
17
+ process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
18
+ /**
19
+ * Group text items into lines based on Y position
20
+ */
21
+ private groupIntoLines;
22
+ /**
23
+ * Detect tables from lines
24
+ * Tables are detected when multiple consecutive lines have similar structure
25
+ */
26
+ private detectTables;
27
+ /**
28
+ * Parse a potential table into structured rows
29
+ */
30
+ private parseTable;
31
+ /**
32
+ * Extract paragraphs from lines
33
+ */
34
+ private extractParagraphs;
35
+ }
@@ -0,0 +1,247 @@
1
+ /**
2
+ * PDF Format Handler
3
+ * Handles PDF files with:
4
+ * - Text extraction with layout preservation
5
+ * - Table detection and extraction
6
+ * - Metadata extraction (author, dates, etc.)
7
+ * - Page-by-page processing
8
+ */
9
+ import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs';
10
+ import { BaseFormatHandler } from './base.js';
11
+ // Use built-in worker for Node.js environments
12
+ // In production, this can be customized via options
13
+ const initializeWorker = () => {
14
+ if (typeof pdfjsLib.GlobalWorkerOptions.workerSrc === 'undefined' ||
15
+ pdfjsLib.GlobalWorkerOptions.workerSrc === '') {
16
+ // Use a data URL to avoid file system dependencies
17
+ // This tells pdfjs to use the built-in fallback worker
18
+ try {
19
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'data:,';
20
+ }
21
+ catch {
22
+ // Ignore if already set or in incompatible environment
23
+ }
24
+ }
25
+ };
26
+ initializeWorker();
27
+ export class PDFHandler extends BaseFormatHandler {
28
+ constructor() {
29
+ super(...arguments);
30
+ this.format = 'pdf';
31
+ }
32
+ canHandle(data) {
33
+ const ext = this.detectExtension(data);
34
+ if (ext === 'pdf')
35
+ return true;
36
+ // Check for PDF magic bytes
37
+ if (Buffer.isBuffer(data)) {
38
+ const header = data.slice(0, 5).toString('ascii');
39
+ return header === '%PDF-';
40
+ }
41
+ return false;
42
+ }
43
+ async process(data, options) {
44
+ const startTime = Date.now();
45
+ // Convert to buffer
46
+ const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'binary');
47
+ try {
48
+ // Load PDF document
49
+ const loadingTask = pdfjsLib.getDocument({
50
+ data: new Uint8Array(buffer),
51
+ useSystemFonts: true,
52
+ standardFontDataUrl: undefined
53
+ });
54
+ const pdfDoc = await loadingTask.promise;
55
+ // Extract metadata
56
+ const metadata = await pdfDoc.getMetadata();
57
+ const numPages = pdfDoc.numPages;
58
+ // Extract text and structure from all pages
59
+ const allData = [];
60
+ let totalTextLength = 0;
61
+ let detectedTables = 0;
62
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {
63
+ const page = await pdfDoc.getPage(pageNum);
64
+ const textContent = await page.getTextContent();
65
+ // Extract text items with positions
66
+ const textItems = textContent.items.map((item) => ({
67
+ text: item.str,
68
+ x: item.transform[4],
69
+ y: item.transform[5],
70
+ width: item.width,
71
+ height: item.height
72
+ }));
73
+ // Combine text items into lines (group by similar Y position)
74
+ const lines = this.groupIntoLines(textItems);
75
+ // Detect tables if requested
76
+ if (options.pdfExtractTables !== false) {
77
+ const tables = this.detectTables(lines);
78
+ if (tables.length > 0) {
79
+ detectedTables += tables.length;
80
+ for (const table of tables) {
81
+ allData.push(...table.rows);
82
+ }
83
+ }
84
+ }
85
+ // Extract paragraphs from non-table lines
86
+ const paragraphs = this.extractParagraphs(lines);
87
+ for (let i = 0; i < paragraphs.length; i++) {
88
+ const text = paragraphs[i].trim();
89
+ if (text.length > 0) {
90
+ totalTextLength += text.length;
91
+ allData.push({
92
+ _page: pageNum,
93
+ _type: 'paragraph',
94
+ _index: i,
95
+ text
96
+ });
97
+ }
98
+ }
99
+ }
100
+ const processingTime = Date.now() - startTime;
101
+ // Get all unique fields (excluding metadata fields)
102
+ const fields = allData.length > 0
103
+ ? Object.keys(allData[0]).filter(k => !k.startsWith('_'))
104
+ : [];
105
+ return {
106
+ format: this.format,
107
+ data: allData,
108
+ metadata: this.createMetadata(allData.length, fields, processingTime, {
109
+ pageCount: numPages,
110
+ textLength: totalTextLength,
111
+ tableCount: detectedTables,
112
+ pdfMetadata: {
113
+ title: metadata.info?.Title || null,
114
+ author: metadata.info?.Author || null,
115
+ subject: metadata.info?.Subject || null,
116
+ creator: metadata.info?.Creator || null,
117
+ producer: metadata.info?.Producer || null,
118
+ creationDate: metadata.info?.CreationDate || null,
119
+ modificationDate: metadata.info?.ModDate || null
120
+ }
121
+ }),
122
+ filename: options.filename
123
+ };
124
+ }
125
+ catch (error) {
126
+ throw new Error(`PDF parsing failed: ${error instanceof Error ? error.message : String(error)}`);
127
+ }
128
+ }
129
+ /**
130
+ * Group text items into lines based on Y position
131
+ */
132
+ groupIntoLines(items) {
133
+ if (items.length === 0)
134
+ return [];
135
+ // Sort by Y position (descending, since PDF coordinates go bottom-up)
136
+ const sorted = [...items].sort((a, b) => b.y - a.y);
137
+ const lines = [];
138
+ let currentLine = [];
139
+ let currentY = sorted[0].y;
140
+ for (const item of sorted) {
141
+ // If Y position differs by more than half the height, it's a new line
142
+ if (Math.abs(item.y - currentY) > (item.height / 2)) {
143
+ if (currentLine.length > 0) {
144
+ // Sort line items by X position
145
+ currentLine.sort((a, b) => a.x - b.x);
146
+ lines.push(currentLine);
147
+ }
148
+ currentLine = [];
149
+ currentY = item.y;
150
+ }
151
+ if (item.text.trim()) {
152
+ currentLine.push({ text: item.text, x: item.x });
153
+ }
154
+ }
155
+ // Add last line
156
+ if (currentLine.length > 0) {
157
+ currentLine.sort((a, b) => a.x - b.x);
158
+ lines.push(currentLine);
159
+ }
160
+ return lines;
161
+ }
162
+ /**
163
+ * Detect tables from lines
164
+ * Tables are detected when multiple consecutive lines have similar structure
165
+ */
166
+ detectTables(lines) {
167
+ const tables = [];
168
+ let potentialTable = [];
169
+ for (let i = 0; i < lines.length; i++) {
170
+ const line = lines[i];
171
+ // A line with multiple items could be part of a table
172
+ if (line.length >= 2) {
173
+ potentialTable.push(line);
174
+ }
175
+ else {
176
+ // End of potential table
177
+ if (potentialTable.length >= 3) { // Need at least header + 2 rows
178
+ const table = this.parseTable(potentialTable);
179
+ if (table) {
180
+ tables.push(table);
181
+ }
182
+ }
183
+ potentialTable = [];
184
+ }
185
+ }
186
+ // Check last potential table
187
+ if (potentialTable.length >= 3) {
188
+ const table = this.parseTable(potentialTable);
189
+ if (table) {
190
+ tables.push(table);
191
+ }
192
+ }
193
+ return tables;
194
+ }
195
+ /**
196
+ * Parse a potential table into structured rows
197
+ */
198
+ parseTable(lines) {
199
+ if (lines.length < 2)
200
+ return null;
201
+ // First line is headers
202
+ const headerLine = lines[0];
203
+ const headers = headerLine.map(item => this.sanitizeFieldName(item.text));
204
+ // Remaining lines are data
205
+ const rows = [];
206
+ for (let i = 1; i < lines.length; i++) {
207
+ const line = lines[i];
208
+ const row = { _type: 'table_row' };
209
+ // Match each item to closest header by X position
210
+ for (let j = 0; j < line.length && j < headers.length; j++) {
211
+ const header = headers[j];
212
+ const value = line[j].text.trim();
213
+ row[header] = value || null;
214
+ }
215
+ if (Object.keys(row).length > 1) { // More than just _type
216
+ rows.push(row);
217
+ }
218
+ }
219
+ return rows.length > 0 ? { rows } : null;
220
+ }
221
+ /**
222
+ * Extract paragraphs from lines
223
+ */
224
+ extractParagraphs(lines) {
225
+ const paragraphs = [];
226
+ let currentParagraph = [];
227
+ for (const line of lines) {
228
+ const lineText = line.map(item => item.text).join(' ').trim();
229
+ if (lineText.length === 0) {
230
+ // Empty line - end paragraph
231
+ if (currentParagraph.length > 0) {
232
+ paragraphs.push(currentParagraph.join(' '));
233
+ currentParagraph = [];
234
+ }
235
+ }
236
+ else {
237
+ currentParagraph.push(lineText);
238
+ }
239
+ }
240
+ // Add last paragraph
241
+ if (currentParagraph.length > 0) {
242
+ paragraphs.push(currentParagraph.join(' '));
243
+ }
244
+ return paragraphs;
245
+ }
246
+ }
247
+ //# sourceMappingURL=pdfHandler.js.map
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Intelligent Import Module
3
+ * Exports main augmentation and types
4
+ */
5
+ export { IntelligentImportAugmentation } from './IntelligentImportAugmentation.js';
6
+ export type { FormatHandler, FormatHandlerOptions, ProcessedData, IntelligentImportConfig } from './types.js';
7
+ export { CSVHandler } from './handlers/csvHandler.js';
8
+ export { ExcelHandler } from './handlers/excelHandler.js';
9
+ export { PDFHandler } from './handlers/pdfHandler.js';
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Intelligent Import Module
3
+ * Exports main augmentation and types
4
+ */
5
+ export { IntelligentImportAugmentation } from './IntelligentImportAugmentation.js';
6
+ export { CSVHandler } from './handlers/csvHandler.js';
7
+ export { ExcelHandler } from './handlers/excelHandler.js';
8
+ export { PDFHandler } from './handlers/pdfHandler.js';
9
+ //# sourceMappingURL=index.js.map