filemayor 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,565 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * ═══════════════════════════════════════════════════════════════════
5
+ * FILEMAYOR — SOP PARSER
6
+ * Parses Standard Operating Procedure documents and converts them
7
+ * into deterministic .filemayor.yml organization rules.
8
+ *
9
+ * Two engines:
10
+ * 1. Rule-Based (offline, always available) — pattern matching
11
+ * 2. Gemini AI (online, enhanced) — natural language understanding
12
+ *
13
+ * Copyright (c) 2024-2026 FileMayor. All rights reserved.
14
+ * ═══════════════════════════════════════════════════════════════════
15
+ */
16
+
17
+ 'use strict';
18
+
19
+ const fs = require('fs');
20
+ const path = require('path');
21
+
22
+ // ─── Text Extraction ──────────────────────────────────────────────
23
+
24
+ /**
25
+ * Extract text from a document file
26
+ * Supports: .txt, .md, .csv, .json, .yaml, .yml
27
+ * For PDF/DOCX — extracts raw text via basic parsers
28
+ * @param {string} filePath - Path to document
29
+ * @returns {Promise<string>} Extracted text
30
+ */
31
+ async function extractText(filePath) {
32
+ const ext = path.extname(filePath).toLowerCase();
33
+ const buffer = fs.readFileSync(filePath);
34
+
35
+ switch (ext) {
36
+ case '.txt':
37
+ case '.md':
38
+ case '.csv':
39
+ case '.yaml':
40
+ case '.yml':
41
+ return buffer.toString('utf-8');
42
+
43
+ case '.json':
44
+ return JSON.stringify(JSON.parse(buffer.toString('utf-8')), null, 2);
45
+
46
+ case '.pdf':
47
+ return extractPDFText(buffer);
48
+
49
+ case '.docx':
50
+ return extractDOCXText(buffer);
51
+
52
+ default:
53
+ // Try as plain text
54
+ return buffer.toString('utf-8');
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Basic PDF text extraction (no external deps)
60
+ * Extracts text streams from PDF structure
61
+ */
62
+ function extractPDFText(buffer) {
63
+ const text = buffer.toString('latin1');
64
+ const textBlocks = [];
65
+
66
+ // Extract text between BT/ET (Begin Text / End Text) operators
67
+ const btEtRegex = /BT\s*([\s\S]*?)\s*ET/g;
68
+ let match;
69
+ while ((match = btEtRegex.exec(text)) !== null) {
70
+ const block = match[1];
71
+ // Extract text from Tj and TJ operators
72
+ const tjRegex = /\(([^)]*)\)\s*Tj/g;
73
+ let tjMatch;
74
+ while ((tjMatch = tjRegex.exec(block)) !== null) {
75
+ textBlocks.push(tjMatch[1]);
76
+ }
77
+ // TJ arrays
78
+ const tjArrayRegex = /\[([^\]]*)\]\s*TJ/g;
79
+ let tjArrMatch;
80
+ while ((tjArrMatch = tjArrayRegex.exec(block)) !== null) {
81
+ const items = tjArrMatch[1];
82
+ const strRegex = /\(([^)]*)\)/g;
83
+ let strMatch;
84
+ while ((strMatch = strRegex.exec(items)) !== null) {
85
+ textBlocks.push(strMatch[1]);
86
+ }
87
+ }
88
+ }
89
+
90
+ if (textBlocks.length === 0) {
91
+ // Fallback: extract any readable text
92
+ return text.replace(/[^\x20-\x7E\n\r\t]/g, ' ')
93
+ .replace(/\s{3,}/g, '\n')
94
+ .trim()
95
+ .slice(0, 50000);
96
+ }
97
+
98
+ return textBlocks.join(' ').replace(/\\n/g, '\n').trim();
99
+ }
100
+
101
+ /**
102
+ * Basic DOCX text extraction (no external deps)
103
+ * DOCX is a ZIP containing XML — we extract text from word/document.xml
104
+ */
105
+ function extractDOCXText(buffer) {
106
+ try {
107
+ // Find PK zip signature
108
+ const zipStr = buffer.toString('binary');
109
+
110
+ // Locate word/document.xml in the zip
111
+ const marker = 'word/document.xml';
112
+ const idx = zipStr.indexOf(marker);
113
+ if (idx === -1) return '[Could not parse DOCX — missing document.xml]';
114
+
115
+ // Find the local file header for this entry
116
+ // Quick approach: find the content between XML tags
117
+ const xmlStart = zipStr.indexOf('<?xml', idx);
118
+ const xmlEnd = zipStr.indexOf('</w:document>', xmlStart);
119
+ if (xmlStart === -1 || xmlEnd === -1) {
120
+ return '[Could not parse DOCX XML content]';
121
+ }
122
+
123
+ const xml = zipStr.slice(xmlStart, xmlEnd + '</w:document>'.length);
124
+
125
+ // Extract text from <w:t> tags
126
+ const textParts = [];
127
+ const wtRegex = /<w:t[^>]*>([^<]*)<\/w:t>/g;
128
+ let match;
129
+ while ((match = wtRegex.exec(xml)) !== null) {
130
+ textParts.push(match[1]);
131
+ }
132
+
133
+ return textParts.join(' ').trim() || '[No text content found in DOCX]';
134
+ } catch {
135
+ return '[DOCX parsing error — try converting to .txt]';
136
+ }
137
+ }
138
+
139
+ // ─── Rule-Based Parser (Offline) ──────────────────────────────────
140
+
141
+ /**
142
+ * Pattern library for detecting organization rules in natural language
143
+ */
144
+ const RULE_PATTERNS = {
145
+ // Directory structure patterns
146
+ directory: [
147
+ /(?:create|make|set up|establish)\s+(?:a\s+)?(?:folder|directory|dir)\s+(?:called|named|for)\s+["""]?([^""".\n]+)["""]?/gi,
148
+ /(?:folder|directory)\s*:\s*["""]?([^""".\n]+)["""]?/gi,
149
+ /(?:organize|sort|file|move)\s+(?:into|to|under)\s+["""]?([^""".\n]+)["""]?/gi,
150
+ ],
151
+
152
+ // File type to folder mapping
153
+ fileTypeMapping: [
154
+ /(?:move|place|put|file|store|organize)\s+(?:all\s+)?\.?(\w+)\s+(?:files?\s+)?(?:to|into|under|in)\s+["""]?([^""".\n]+)["""]?/gi,
155
+ /\.(\w+)\s+(?:files?\s+)?(?:→|->|=>|goes?\s+(?:to|into)|should\s+(?:go|be\s+(?:in|placed)))\s+["""]?([^""".\n]+)["""]?/gi,
156
+ /(\w+)\s+files?\s*(?:\([^)]*\))?\s*(?:→|->|=>|:)\s*["""]?([^""".\n]+)["""]?/gi,
157
+ ],
158
+
159
+ // Naming convention patterns
160
+ naming: [
161
+ /(?:name|rename|naming)\s+(?:convention|format|scheme|pattern)\s*:\s*["""]?([^""".\n]+)["""]?/gi,
162
+ /(?:files?\s+)?(?:should\s+be|must\s+be|are)\s+(?:named|renamed)\s+(?:as|with|using)\s+["""]?([^""".\n]+)["""]?/gi,
163
+ /prefix\s+(?:with|using|by)\s+["""]?([^""".\n]+)["""]?/gi,
164
+ ],
165
+
166
+ // Date-based rules
167
+ dateBased: [
168
+ /(?:organize|sort|group|arrange)\s+by\s+(?:date|year|month|quarter)/gi,
169
+ /(?:create|use)\s+(?:date|year|month|quarterly)\s+(?:folders?|directories?)/gi,
170
+ ],
171
+
172
+ // Retention / cleanup rules
173
+ cleanup: [
174
+ /(?:delete|remove|clean|purge)\s+(?:files?\s+)?(?:older\s+than|after)\s+(\d+)\s+(days?|weeks?|months?|years?)/gi,
175
+ /(?:retention|keep)\s+(?:period|policy)\s*:\s*(\d+)\s+(days?|weeks?|months?|years?)/gi,
176
+ /(?:archive|compress)\s+(?:files?\s+)?(?:older\s+than|after)\s+(\d+)\s+(days?|weeks?|months?|years?)/gi,
177
+ ],
178
+
179
+ // Ignore patterns
180
+ ignore: [
181
+ /(?:ignore|skip|exclude|don't\s+(?:touch|move|organize))\s+["""]?([^""".\n]+)["""]?/gi,
182
+ /(?:protected|locked|system)\s+(?:folder|directory|files?)\s*:\s*["""]?([^""".\n]+)["""]?/gi,
183
+ ],
184
+ };
185
+
186
+ /**
187
+ * Common file type aliases
188
+ */
189
+ const FILE_TYPE_ALIASES = {
190
+ 'pdf': ['.pdf'],
191
+ 'pdfs': ['.pdf'],
192
+ 'document': ['.pdf', '.doc', '.docx', '.txt', '.rtf', '.odt'],
193
+ 'documents': ['.pdf', '.doc', '.docx', '.txt', '.rtf', '.odt'],
194
+ 'image': ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'],
195
+ 'images': ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'],
196
+ 'photo': ['.jpg', '.jpeg', '.png', '.raw', '.heic'],
197
+ 'photos': ['.jpg', '.jpeg', '.png', '.raw', '.heic'],
198
+ 'video': ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.webm'],
199
+ 'videos': ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.webm'],
200
+ 'audio': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a'],
201
+ 'music': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a'],
202
+ 'spreadsheet': ['.xls', '.xlsx', '.csv', '.ods'],
203
+ 'spreadsheets': ['.xls', '.xlsx', '.csv', '.ods'],
204
+ 'presentation': ['.ppt', '.pptx', '.key', '.odp'],
205
+ 'presentations': ['.ppt', '.pptx', '.key', '.odp'],
206
+ 'archive': ['.zip', '.rar', '.7z', '.tar', '.gz'],
207
+ 'archives': ['.zip', '.rar', '.7z', '.tar', '.gz'],
208
+ 'code': ['.js', '.ts', '.py', '.java', '.cpp', '.go', '.rs', '.rb'],
209
+ 'executable': ['.exe', '.msi', '.app', '.dmg', '.deb'],
210
+ 'executables': ['.exe', '.msi', '.app', '.dmg', '.deb'],
211
+ 'font': ['.ttf', '.otf', '.woff', '.woff2'],
212
+ 'fonts': ['.ttf', '.otf', '.woff', '.woff2'],
213
+ 'design': ['.psd', '.ai', '.sketch', '.fig', '.xd'],
214
+ };
215
+
216
+ /**
217
+ * Parse SOP text using rule-based pattern matching (offline)
218
+ * @param {string} text - SOP document text
219
+ * @returns {Object} Parsed rules
220
+ */
221
+ function parseRuleBased(text) {
222
+ const rules = {
223
+ directories: [],
224
+ fileTypeMappings: [],
225
+ namingConvention: null,
226
+ dateBased: false,
227
+ cleanup: [],
228
+ ignore: [],
229
+ confidence: 0,
230
+ method: 'rule-based',
231
+ };
232
+
233
+ let matchCount = 0;
234
+
235
+ // Extract directory creation rules
236
+ for (const pattern of RULE_PATTERNS.directory) {
237
+ let match;
238
+ pattern.lastIndex = 0;
239
+ while ((match = pattern.exec(text)) !== null) {
240
+ const dir = match[1].trim();
241
+ if (dir && dir.length < 100 && !rules.directories.includes(dir)) {
242
+ rules.directories.push(dir);
243
+ matchCount++;
244
+ }
245
+ }
246
+ }
247
+
248
+ // Extract file type → folder mappings
249
+ for (const pattern of RULE_PATTERNS.fileTypeMapping) {
250
+ let match;
251
+ pattern.lastIndex = 0;
252
+ while ((match = pattern.exec(text)) !== null) {
253
+ const fileType = match[1].trim().toLowerCase();
254
+ const folder = match[2].trim();
255
+ if (fileType && folder && folder.length < 100) {
256
+ const extensions = FILE_TYPE_ALIASES[fileType] || [`.${fileType}`];
257
+ rules.fileTypeMappings.push({
258
+ extensions,
259
+ folder,
260
+ original: match[0].trim(),
261
+ });
262
+ matchCount++;
263
+ }
264
+ }
265
+ }
266
+
267
+ // Extract naming conventions
268
+ for (const pattern of RULE_PATTERNS.naming) {
269
+ let match;
270
+ pattern.lastIndex = 0;
271
+ if ((match = pattern.exec(text)) !== null) {
272
+ const convention = match[1].trim().toLowerCase();
273
+ if (convention.includes('date') || convention.includes('yyyy')) {
274
+ rules.namingConvention = 'date_prefix';
275
+ } else if (convention.includes('category') || convention.includes('type')) {
276
+ rules.namingConvention = 'category_prefix';
277
+ } else if (convention.includes('clean') || convention.includes('title')) {
278
+ rules.namingConvention = 'clean';
279
+ } else {
280
+ rules.namingConvention = 'original';
281
+ }
282
+ matchCount++;
283
+ }
284
+ }
285
+
286
+ // Check for date-based organization
287
+ for (const pattern of RULE_PATTERNS.dateBased) {
288
+ pattern.lastIndex = 0;
289
+ if (pattern.test(text)) {
290
+ rules.dateBased = true;
291
+ matchCount++;
292
+ }
293
+ }
294
+
295
+ // Extract cleanup rules
296
+ for (const pattern of RULE_PATTERNS.cleanup) {
297
+ let match;
298
+ pattern.lastIndex = 0;
299
+ while ((match = pattern.exec(text)) !== null) {
300
+ rules.cleanup.push({
301
+ action: match[0].toLowerCase().startsWith('archive') ? 'archive' : 'delete',
302
+ amount: parseInt(match[1], 10),
303
+ unit: match[2].replace(/s$/, ''),
304
+ original: match[0].trim(),
305
+ });
306
+ matchCount++;
307
+ }
308
+ }
309
+
310
+ // Extract ignore patterns
311
+ for (const pattern of RULE_PATTERNS.ignore) {
312
+ let match;
313
+ pattern.lastIndex = 0;
314
+ while ((match = pattern.exec(text)) !== null) {
315
+ const ignore = match[1].trim();
316
+ if (ignore && !rules.ignore.includes(ignore)) {
317
+ rules.ignore.push(ignore);
318
+ matchCount++;
319
+ }
320
+ }
321
+ }
322
+
323
+ // Confidence: 0-100 based on how many rules were found
324
+ rules.confidence = Math.min(100, Math.round(matchCount * 15));
325
+
326
+ return rules;
327
+ }
328
+
329
+ // ─── Gemini AI Parser (Online, Enhanced) ──────────────────────────
330
+
331
+ const GEMINI_MODEL = 'gemini-2.0-flash';
332
+ const GEMINI_ENDPOINT = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`;
333
+
334
+ const GEMINI_SYSTEM_PROMPT = `You are an expert file organization consultant. You analyze Standard Operating Procedure (SOP) documents and extract precise file organization rules.
335
+
336
+ Your output MUST be a valid JSON object with this exact structure:
337
+ {
338
+ "directories": ["list", "of", "folder", "names", "to", "create"],
339
+ "fileTypeMappings": [
340
+ {"extensions": [".pdf", ".doc"], "folder": "Documents"},
341
+ {"extensions": [".jpg", ".png"], "folder": "Images"}
342
+ ],
343
+ "namingConvention": "original" | "date_prefix" | "category_prefix" | "clean",
344
+ "dateBased": true | false,
345
+ "cleanup": [
346
+ {"action": "delete" | "archive", "amount": 30, "unit": "day"}
347
+ ],
348
+ "ignore": ["patterns", "to", "ignore"],
349
+ "watchRules": [
350
+ {"match": "*.pdf", "action": "move", "dest": "Documents/PDFs"}
351
+ ]
352
+ }
353
+
354
+ Rules:
355
+ - Extract EVERY organization rule from the SOP, no matter how implicit
356
+ - Map file types to well-known extensions (e.g., "spreadsheets" → [".xls", ".xlsx", ".csv"])
357
+ - If the SOP mentions date-based organization, set dateBased to true
358
+ - If retention periods are mentioned, add cleanup rules
359
+ - Output ONLY the JSON object, no markdown or explanation`;
360
+
361
+ /**
362
+ * Parse SOP text using Google Gemini AI
363
+ * @param {string} text - SOP document text
364
+ * @param {string} apiKey - Gemini API key
365
+ * @returns {Promise<Object>} AI-parsed rules
366
+ */
367
+ async function parseWithGemini(text, apiKey) {
368
+ if (!apiKey) {
369
+ throw new Error(
370
+ 'Gemini API key required. Set GEMINI_API_KEY environment variable.\n' +
371
+ 'Get a free key at: https://aistudio.google.com/apikey'
372
+ );
373
+ }
374
+
375
+ // Truncate very long documents to stay within token limits
376
+ const truncated = text.length > 30000 ? text.slice(0, 30000) + '\n\n[...truncated]' : text;
377
+
378
+ const body = {
379
+ contents: [{
380
+ parts: [{
381
+ text: `${GEMINI_SYSTEM_PROMPT}\n\n--- SOP DOCUMENT START ---\n${truncated}\n--- SOP DOCUMENT END ---\n\nExtract ALL organization rules as JSON:`
382
+ }]
383
+ }],
384
+ generationConfig: {
385
+ temperature: 0.1,
386
+ topP: 0.8,
387
+ maxOutputTokens: 4096,
388
+ }
389
+ };
390
+
391
+ const url = `${GEMINI_ENDPOINT}?key=${apiKey}`;
392
+
393
+ // Use native fetch (Node 18+)
394
+ const response = await fetch(url, {
395
+ method: 'POST',
396
+ headers: { 'Content-Type': 'application/json' },
397
+ body: JSON.stringify(body),
398
+ });
399
+
400
+ if (!response.ok) {
401
+ const err = await response.text();
402
+ if (response.status === 429) {
403
+ throw new Error('Gemini rate limit exceeded. Wait a moment and try again.');
404
+ }
405
+ if (response.status === 403) {
406
+ throw new Error('Invalid Gemini API key. Get a free key at: https://aistudio.google.com/apikey');
407
+ }
408
+ throw new Error(`Gemini API error (${response.status}): ${err.slice(0, 200)}`);
409
+ }
410
+
411
+ const data = await response.json();
412
+ const rawText = data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
413
+
414
+ // Extract JSON from response (Gemini may wrap it in markdown)
415
+ const jsonMatch = rawText.match(/\{[\s\S]*\}/);
416
+ if (!jsonMatch) {
417
+ throw new Error('Gemini did not return valid JSON. Using rule-based fallback.');
418
+ }
419
+
420
+ try {
421
+ const parsed = JSON.parse(jsonMatch[0]);
422
+ parsed.confidence = 90;
423
+ parsed.method = 'gemini-ai';
424
+ return parsed;
425
+ } catch {
426
+ throw new Error('Failed to parse Gemini JSON response. Using rule-based fallback.');
427
+ }
428
+ }
429
+
430
+ // ─── Rules → Config Converter ─────────────────────────────────────
431
+
432
+ /**
433
+ * Convert parsed rules into a .filemayor.yml config string
434
+ * @param {Object} rules - Parsed rules from either engine
435
+ * @returns {string} YAML config
436
+ */
437
+ function rulesToConfig(rules) {
438
+ const lines = [];
439
+ lines.push('# FileMayor Configuration');
440
+ lines.push(`# Generated from SOP (${rules.method || 'unknown'}, confidence: ${rules.confidence || 0}%)`);
441
+ lines.push(`# Generated on: ${new Date().toISOString()}`);
442
+ lines.push('');
443
+ lines.push('version: 1');
444
+ lines.push('');
445
+
446
+ // Organize section
447
+ lines.push('organize:');
448
+ lines.push(` naming: ${rules.namingConvention || 'original'}`);
449
+ lines.push(' duplicates: rename');
450
+
451
+ if (rules.dateBased) {
452
+ lines.push(' date_folders: true');
453
+ }
454
+
455
+ // Custom categories from file type mappings
456
+ if (rules.fileTypeMappings && rules.fileTypeMappings.length > 0) {
457
+ lines.push(' categories:');
458
+ for (const mapping of rules.fileTypeMappings) {
459
+ const key = mapping.folder.toLowerCase().replace(/[^a-z0-9]/g, '_');
460
+ const exts = mapping.extensions.map(e => e.startsWith('.') ? e : `.${e}`);
461
+ lines.push(` ${key}: [${exts.join(', ')}]`);
462
+ }
463
+ }
464
+
465
+ // Ignore patterns
466
+ if (rules.ignore && rules.ignore.length > 0) {
467
+ lines.push(` ignore: [${rules.ignore.join(', ')}]`);
468
+ }
469
+
470
+ // Clean section
471
+ if (rules.cleanup && rules.cleanup.length > 0) {
472
+ lines.push('');
473
+ lines.push('clean:');
474
+ lines.push(' auto: true');
475
+ for (const rule of rules.cleanup) {
476
+ lines.push(` # ${rule.action} files older than ${rule.amount} ${rule.unit}(s)`);
477
+ }
478
+ }
479
+
480
+ // Watch rules
481
+ if (rules.watchRules && rules.watchRules.length > 0) {
482
+ lines.push('');
483
+ lines.push('watch:');
484
+ lines.push(' rules:');
485
+ for (const rule of rules.watchRules) {
486
+ lines.push(` - match: "${rule.match}"`);
487
+ lines.push(` action: ${rule.action}`);
488
+ lines.push(` dest: ${rule.dest}`);
489
+ }
490
+ }
491
+
492
+ // Directories to create
493
+ if (rules.directories && rules.directories.length > 0) {
494
+ lines.push('');
495
+ lines.push('# Directories to create:');
496
+ for (const dir of rules.directories) {
497
+ lines.push(`# - ${dir}`);
498
+ }
499
+ }
500
+
501
+ lines.push('');
502
+ return lines.join('\n');
503
+ }
504
+
505
+ // ─── Main SOP Parser ──────────────────────────────────────────────
506
+
507
+ /**
508
+ * Parse an SOP document and generate FileMayor rules
509
+ * @param {string} filePath - Path to SOP document
510
+ * @param {Object} options
511
+ * @param {boolean} options.useAI - Use Gemini AI (default: true if key available)
512
+ * @param {string} options.apiKey - Gemini API key (or GEMINI_API_KEY env var)
513
+ * @returns {Promise<Object>} { rules, config, text, method }
514
+ */
515
+ async function parseSOP(filePath, options = {}) {
516
+ // Extract text from document
517
+ const text = await extractText(filePath);
518
+
519
+ if (!text || text.trim().length < 10) {
520
+ throw new Error('Could not extract meaningful text from the document');
521
+ }
522
+
523
+ const apiKey = options.apiKey || process.env.GEMINI_API_KEY || '';
524
+ const useAI = options.useAI !== false && apiKey.length > 0;
525
+
526
+ let rules;
527
+ let fallbackUsed = false;
528
+
529
+ if (useAI) {
530
+ try {
531
+ rules = await parseWithGemini(text, apiKey);
532
+ } catch (err) {
533
+ console.warn(`[SOP] Gemini AI failed: ${err.message}`);
534
+ console.warn('[SOP] Falling back to rule-based parser');
535
+ rules = parseRuleBased(text);
536
+ fallbackUsed = true;
537
+ }
538
+ } else {
539
+ rules = parseRuleBased(text);
540
+ }
541
+
542
+ // Generate config YAML
543
+ const config = rulesToConfig(rules);
544
+
545
+ return {
546
+ rules,
547
+ config,
548
+ text: text.slice(0, 500) + (text.length > 500 ? '...' : ''),
549
+ method: fallbackUsed ? 'rule-based (AI fallback)' : rules.method,
550
+ documentPath: filePath,
551
+ documentSize: text.length,
552
+ };
553
+ }
554
+
555
+ // ─── Exports ──────────────────────────────────────────────────────
556
+
557
+ module.exports = {
558
+ parseSOP,
559
+ extractText,
560
+ parseRuleBased,
561
+ parseWithGemini,
562
+ rulesToConfig,
563
+ FILE_TYPE_ALIASES,
564
+ RULE_PATTERNS,
565
+ };