convertit 1.0.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +221 -6
  2. package/dist/analysis/index.d.ts +38 -0
  3. package/dist/analysis/index.d.ts.map +1 -0
  4. package/dist/batch/index.d.ts +116 -0
  5. package/dist/batch/index.d.ts.map +1 -0
  6. package/dist/converters/excel-styles.d.ts.map +1 -1
  7. package/dist/converters/index.d.ts +10 -6
  8. package/dist/converters/index.d.ts.map +1 -1
  9. package/dist/converters/pdf.d.ts.map +1 -1
  10. package/dist/core/converter.d.ts +54 -2
  11. package/dist/core/converter.d.ts.map +1 -1
  12. package/dist/core/errors.d.ts +5 -0
  13. package/dist/core/errors.d.ts.map +1 -1
  14. package/dist/core/types.d.ts +1201 -1
  15. package/dist/core/types.d.ts.map +1 -1
  16. package/dist/extractors/base.d.ts +151 -0
  17. package/dist/extractors/base.d.ts.map +1 -0
  18. package/dist/extractors/csv.d.ts +89 -0
  19. package/dist/extractors/csv.d.ts.map +1 -0
  20. package/dist/extractors/excel.d.ts +102 -0
  21. package/dist/extractors/excel.d.ts.map +1 -0
  22. package/dist/extractors/image.d.ts +94 -0
  23. package/dist/extractors/image.d.ts.map +1 -0
  24. package/dist/extractors/index.d.ts +16 -0
  25. package/dist/extractors/index.d.ts.map +1 -0
  26. package/dist/extractors/pdf.d.ts +89 -0
  27. package/dist/extractors/pdf.d.ts.map +1 -0
  28. package/dist/extractors/word.d.ts +83 -0
  29. package/dist/extractors/word.d.ts.map +1 -0
  30. package/dist/index.d.ts +8 -2
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +84776 -80182
  33. package/dist/search/index.d.ts +105 -0
  34. package/dist/search/index.d.ts.map +1 -0
  35. package/dist/streaming/index.d.ts +94 -0
  36. package/dist/streaming/index.d.ts.map +1 -0
  37. package/dist/utils/helpers.d.ts.map +1 -1
  38. package/package.json +41 -3
@@ -311,7 +311,7 @@ export interface CellStyle {
311
311
  numFmt?: string;
312
312
  }
313
313
  export interface BorderStyle {
314
- style?: 'thin' | 'medium' | 'thick' | 'dotted' | 'dashed' | 'double' | 'hair' | 'mediumDashed' | 'dashDot' | 'mediumDashDot' | 'dashDotDot' | 'slantDashDot';
314
+ style?: 'thin' | 'medium' | 'thick' | 'dotted' | 'dashed' | 'double' | 'hair' | 'mediumDashed' | 'dashDot' | 'mediumDashDot' | 'dashDotDot' | 'slantDashDot' | 'mediumDashDotDot';
315
315
  color?: string;
316
316
  }
317
317
  export type ComparisonOperator = 'equal' | 'notEqual' | 'greaterThan' | 'lessThan' | 'greaterThanOrEqual' | 'lessThanOrEqual' | 'contains' | 'notContains' | 'startsWith' | 'endsWith' | 'isEmpty' | 'isNotEmpty';
@@ -699,4 +699,1204 @@ export declare const MIME_TYPES: Record<FileFormat, string>;
699
699
  export declare const DEFAULT_PAGE_SIZES: Record<string, PageSize>;
700
700
  export declare const DEFAULT_MARGINS: PageMargins;
701
701
  export declare const DEFAULT_FONT: FontConfig;
702
+ /**
703
+ * Base extraction result interface
704
+ */
705
+ export interface ExtractionResult<T = unknown> {
706
+ success: boolean;
707
+ data: T;
708
+ format: FileFormat;
709
+ sourceFile?: string;
710
+ metadata: DocumentMetadata;
711
+ duration: number;
712
+ warnings?: string[];
713
+ errors?: string[];
714
+ }
715
+ /**
716
+ * Document metadata extracted from files
717
+ */
718
+ export interface DocumentMetadata {
719
+ title?: string;
720
+ author?: string;
721
+ creator?: string;
722
+ producer?: string;
723
+ subject?: string;
724
+ keywords?: string[];
725
+ creationDate?: Date;
726
+ modificationDate?: Date;
727
+ pageCount?: number;
728
+ wordCount?: number;
729
+ characterCount?: number;
730
+ language?: string;
731
+ encrypted?: boolean;
732
+ fileSize?: number;
733
+ version?: string;
734
+ customProperties?: Record<string, unknown>;
735
+ }
736
+ /**
737
+ * Extracted text content with positioning
738
+ */
739
+ export interface ExtractedText {
740
+ content: string;
741
+ pages?: PageText[];
742
+ paragraphs?: TextBlock[];
743
+ lines?: TextLine[];
744
+ words?: TextWord[];
745
+ statistics: TextStatistics;
746
+ }
747
+ export interface PageText {
748
+ pageNumber: number;
749
+ content: string;
750
+ paragraphs: TextBlock[];
751
+ lines: TextLine[];
752
+ boundingBox?: BoundingBox;
753
+ }
754
+ export interface TextBlock {
755
+ id: string;
756
+ content: string;
757
+ type: 'paragraph' | 'heading' | 'list' | 'code' | 'quote' | 'footnote';
758
+ level?: number;
759
+ style?: TextStyle;
760
+ boundingBox?: BoundingBox;
761
+ pageNumber?: number;
762
+ }
763
+ export interface TextLine {
764
+ content: string;
765
+ lineNumber: number;
766
+ boundingBox?: BoundingBox;
767
+ confidence?: number;
768
+ }
769
+ export interface TextWord {
770
+ content: string;
771
+ boundingBox?: BoundingBox;
772
+ confidence?: number;
773
+ fontName?: string;
774
+ fontSize?: number;
775
+ }
776
+ export interface TextStyle {
777
+ fontFamily?: string;
778
+ fontSize?: number;
779
+ fontWeight?: 'normal' | 'bold';
780
+ fontStyle?: 'normal' | 'italic';
781
+ textDecoration?: 'none' | 'underline' | 'strikethrough';
782
+ color?: string;
783
+ backgroundColor?: string;
784
+ alignment?: 'left' | 'center' | 'right' | 'justify';
785
+ lineHeight?: number;
786
+ letterSpacing?: number;
787
+ }
788
+ export interface TextStatistics {
789
+ totalCharacters: number;
790
+ totalWords: number;
791
+ totalSentences: number;
792
+ totalParagraphs: number;
793
+ totalPages: number;
794
+ averageWordsPerSentence: number;
795
+ averageCharactersPerWord: number;
796
+ readingTimeMinutes: number;
797
+ speakingTimeMinutes: number;
798
+ readabilityScores?: ReadabilityScores;
799
+ }
800
+ export interface ReadabilityScores {
801
+ fleschKincaidGrade?: number;
802
+ fleschReadingEase?: number;
803
+ gunningFog?: number;
804
+ colemanLiau?: number;
805
+ automatedReadabilityIndex?: number;
806
+ smogIndex?: number;
807
+ }
808
+ /**
809
+ * Bounding box for positioned elements
810
+ */
811
+ export interface BoundingBox {
812
+ x: number;
813
+ y: number;
814
+ width: number;
815
+ height: number;
816
+ pageNumber?: number;
817
+ }
818
+ /**
819
+ * Extracted image from document
820
+ */
821
+ export interface ExtractedImage {
822
+ id: string;
823
+ data: Buffer;
824
+ format: 'png' | 'jpg' | 'jpeg' | 'gif' | 'bmp' | 'tiff' | 'webp' | 'svg';
825
+ width: number;
826
+ height: number;
827
+ dpi?: number;
828
+ colorSpace?: 'rgb' | 'cmyk' | 'grayscale';
829
+ bitDepth?: number;
830
+ pageNumber?: number;
831
+ boundingBox?: BoundingBox;
832
+ altText?: string;
833
+ caption?: string;
834
+ metadata?: ImageMetadata;
835
+ }
836
+ export interface ImageMetadata {
837
+ exif?: ExifData;
838
+ iptc?: IptcData;
839
+ xmp?: Record<string, unknown>;
840
+ icc?: IccProfile;
841
+ }
842
+ export interface ExifData {
843
+ make?: string;
844
+ model?: string;
845
+ dateTime?: Date;
846
+ exposureTime?: string;
847
+ fNumber?: number;
848
+ iso?: number;
849
+ focalLength?: number;
850
+ gpsLatitude?: number;
851
+ gpsLongitude?: number;
852
+ orientation?: number;
853
+ software?: string;
854
+ [key: string]: unknown;
855
+ }
856
+ export interface IptcData {
857
+ title?: string;
858
+ description?: string;
859
+ keywords?: string[];
860
+ copyright?: string;
861
+ creator?: string;
862
+ city?: string;
863
+ country?: string;
864
+ [key: string]: unknown;
865
+ }
866
+ export interface IccProfile {
867
+ description?: string;
868
+ colorSpace?: string;
869
+ profileClass?: string;
870
+ }
871
+ /**
872
+ * Extracted table from document
873
+ */
874
+ export interface ExtractedTable {
875
+ id: string;
876
+ headers?: string[];
877
+ rows: TableRow[];
878
+ columnCount: number;
879
+ rowCount: number;
880
+ pageNumber?: number;
881
+ boundingBox?: BoundingBox;
882
+ style?: TableStyle;
883
+ name?: string;
884
+ summary?: string;
885
+ }
886
+ export interface TableRow {
887
+ cells: TableCell[];
888
+ isHeader?: boolean;
889
+ rowIndex: number;
890
+ }
891
+ export interface TableCell {
892
+ content: string;
893
+ value?: unknown;
894
+ rowSpan?: number;
895
+ colSpan?: number;
896
+ columnIndex: number;
897
+ style?: CellStyle;
898
+ formula?: string;
899
+ dataType?: 'string' | 'number' | 'date' | 'boolean' | 'formula' | 'error' | 'empty';
900
+ }
901
+ export interface TableStyle {
902
+ borderCollapse?: 'collapse' | 'separate';
903
+ borderColor?: string;
904
+ headerBackground?: string;
905
+ alternateRowColors?: boolean;
906
+ }
907
+ /**
908
+ * Extracted hyperlink
909
+ */
910
+ export interface ExtractedLink {
911
+ id: string;
912
+ text: string;
913
+ url: string;
914
+ type: 'external' | 'internal' | 'email' | 'phone' | 'anchor';
915
+ pageNumber?: number;
916
+ boundingBox?: BoundingBox;
917
+ isValid?: boolean;
918
+ }
919
+ /**
920
+ * Extracted annotation/comment
921
+ */
922
+ export interface ExtractedAnnotation {
923
+ id: string;
924
+ type: 'highlight' | 'underline' | 'strikeout' | 'note' | 'comment' | 'freeText' | 'stamp' | 'drawing';
925
+ content?: string;
926
+ author?: string;
927
+ createdAt?: Date;
928
+ modifiedAt?: Date;
929
+ color?: string;
930
+ pageNumber?: number;
931
+ boundingBox?: BoundingBox;
932
+ replies?: ExtractedAnnotation[];
933
+ status?: 'open' | 'resolved' | 'accepted' | 'rejected';
934
+ }
935
+ /**
936
+ * Extracted form field
937
+ */
938
+ export interface ExtractedFormField {
939
+ id: string;
940
+ name: string;
941
+ type: 'text' | 'checkbox' | 'radio' | 'select' | 'button' | 'signature' | 'date' | 'number';
942
+ value?: unknown;
943
+ options?: string[];
944
+ required?: boolean;
945
+ readOnly?: boolean;
946
+ maxLength?: number;
947
+ pageNumber?: number;
948
+ boundingBox?: BoundingBox;
949
+ validation?: FormFieldValidation;
950
+ }
951
+ export interface FormFieldValidation {
952
+ type?: 'none' | 'email' | 'phone' | 'url' | 'number' | 'date' | 'regex';
953
+ pattern?: string;
954
+ min?: number | Date;
955
+ max?: number | Date;
956
+ errorMessage?: string;
957
+ }
958
+ /**
959
+ * Extracted bookmark/outline
960
+ */
961
+ export interface ExtractedBookmark {
962
+ id: string;
963
+ title: string;
964
+ pageNumber?: number;
965
+ destination?: string;
966
+ level: number;
967
+ children?: ExtractedBookmark[];
968
+ color?: string;
969
+ isOpen?: boolean;
970
+ }
971
+ /**
972
+ * Extracted attachment/embedded file
973
+ */
974
+ export interface ExtractedAttachment {
975
+ id: string;
976
+ filename: string;
977
+ data: Buffer;
978
+ mimeType: string;
979
+ size: number;
980
+ description?: string;
981
+ createdAt?: Date;
982
+ modifiedAt?: Date;
983
+ checksum?: string;
984
+ }
985
+ /**
986
+ * General extraction options (base interface)
987
+ */
988
+ export interface ExtractionOptions {
989
+ extractText?: boolean;
990
+ extractImages?: boolean;
991
+ extractTables?: boolean;
992
+ extractMetadata?: boolean;
993
+ preserveFormatting?: boolean;
994
+ [key: string]: unknown;
995
+ }
996
+ /**
997
+ * PDF-specific extraction options
998
+ */
999
+ export interface PDFExtractionOptions {
1000
+ extractText?: boolean;
1001
+ extractImages?: boolean;
1002
+ extractTables?: boolean;
1003
+ extractLinks?: boolean;
1004
+ extractAnnotations?: boolean;
1005
+ extractForms?: boolean;
1006
+ extractBookmarks?: boolean;
1007
+ extractAttachments?: boolean;
1008
+ extractMetadata?: boolean;
1009
+ pages?: number[] | 'all';
1010
+ preserveLayout?: boolean;
1011
+ ocrIfNeeded?: boolean;
1012
+ ocrLanguage?: string | string[];
1013
+ password?: string;
1014
+ imageFormat?: 'png' | 'jpg' | 'webp';
1015
+ imageQuality?: number;
1016
+ minImageSize?: number;
1017
+ }
1018
+ /**
1019
+ * PDF extraction result
1020
+ */
1021
+ export interface PDFExtractionResult extends ExtractionResult {
1022
+ data: {
1023
+ text?: ExtractedText;
1024
+ images?: ExtractedImage[];
1025
+ tables?: ExtractedTable[];
1026
+ links?: ExtractedLink[];
1027
+ annotations?: ExtractedAnnotation[];
1028
+ formFields?: ExtractedFormField[];
1029
+ bookmarks?: ExtractedBookmark[];
1030
+ attachments?: ExtractedAttachment[];
1031
+ };
1032
+ }
1033
+ /**
1034
+ * Word-specific extraction options
1035
+ */
1036
+ export interface WordExtractionOptions {
1037
+ extractText?: boolean;
1038
+ extractImages?: boolean;
1039
+ extractTables?: boolean;
1040
+ extractStyles?: boolean;
1041
+ extractComments?: boolean;
1042
+ extractHeaders?: boolean;
1043
+ extractFooters?: boolean;
1044
+ extractFootnotes?: boolean;
1045
+ extractEndnotes?: boolean;
1046
+ extractBookmarks?: boolean;
1047
+ extractMetadata?: boolean;
1048
+ preserveFormatting?: boolean;
1049
+ includeTrackedChanges?: boolean;
1050
+ }
1051
+ /**
1052
+ * Word extraction result
1053
+ */
1054
+ export interface WordExtractionResult extends ExtractionResult {
1055
+ data: {
1056
+ text?: ExtractedText;
1057
+ images?: ExtractedImage[];
1058
+ tables?: ExtractedTable[];
1059
+ styles?: ExtractedStyle[];
1060
+ comments?: ExtractedComment[];
1061
+ headers?: ExtractedHeaderFooter[];
1062
+ footers?: ExtractedHeaderFooter[];
1063
+ footnotes?: ExtractedNote[];
1064
+ endnotes?: ExtractedNote[];
1065
+ bookmarks?: ExtractedBookmark[];
1066
+ sections?: DocumentSection[];
1067
+ };
1068
+ }
1069
+ export interface ExtractedStyle {
1070
+ id: string;
1071
+ name: string;
1072
+ type: 'paragraph' | 'character' | 'table' | 'list';
1073
+ basedOn?: string;
1074
+ font?: Partial<FontConfig>;
1075
+ paragraph?: {
1076
+ alignment?: 'left' | 'center' | 'right' | 'justify';
1077
+ spacing?: {
1078
+ before?: number;
1079
+ after?: number;
1080
+ line?: number;
1081
+ };
1082
+ indent?: {
1083
+ left?: number;
1084
+ right?: number;
1085
+ firstLine?: number;
1086
+ };
1087
+ };
1088
+ }
1089
+ export interface ExtractedComment {
1090
+ id: string;
1091
+ author: string;
1092
+ content: string;
1093
+ createdAt?: Date;
1094
+ referencedText?: string;
1095
+ replies?: ExtractedComment[];
1096
+ resolved?: boolean;
1097
+ }
1098
+ export interface ExtractedHeaderFooter {
1099
+ type: 'header' | 'footer';
1100
+ section: 'first' | 'odd' | 'even' | 'default';
1101
+ content: string;
1102
+ images?: ExtractedImage[];
1103
+ }
1104
+ export interface ExtractedNote {
1105
+ id: string;
1106
+ type: 'footnote' | 'endnote';
1107
+ referenceNumber: number;
1108
+ content: string;
1109
+ }
1110
+ export interface DocumentSection {
1111
+ id: string;
1112
+ startPage: number;
1113
+ endPage: number;
1114
+ orientation: 'portrait' | 'landscape';
1115
+ pageSize: PageSize;
1116
+ margins: PageMargins;
1117
+ columns: number;
1118
+ }
1119
+ /**
1120
+ * Excel-specific extraction options
1121
+ */
1122
+ export interface ExcelExtractionOptions {
1123
+ extractData?: boolean;
1124
+ extractFormulas?: boolean;
1125
+ extractStyles?: boolean;
1126
+ extractCharts?: boolean;
1127
+ extractImages?: boolean;
1128
+ extractComments?: boolean;
1129
+ extractNames?: boolean;
1130
+ extractValidation?: boolean;
1131
+ extractConditionalFormatting?: boolean;
1132
+ extractMetadata?: boolean;
1133
+ sheets?: string[] | number[] | 'all';
1134
+ includeHiddenSheets?: boolean;
1135
+ includeHiddenRows?: boolean;
1136
+ includeHiddenColumns?: boolean;
1137
+ evaluateFormulas?: boolean;
1138
+ dateFormat?: string;
1139
+ numberFormat?: string;
1140
+ password?: string;
1141
+ }
1142
+ /**
1143
+ * Excel extraction result
1144
+ */
1145
+ export interface ExcelExtractionResult extends ExtractionResult {
1146
+ data: {
1147
+ sheets: ExtractedSheet[];
1148
+ charts?: ExtractedChart[];
1149
+ images?: ExtractedImage[];
1150
+ names?: ExtractedName[];
1151
+ styles?: ExtractedCellStyle[];
1152
+ };
1153
+ }
1154
+ export interface ExtractedSheet {
1155
+ id: string;
1156
+ name: string;
1157
+ index: number;
1158
+ isHidden: boolean;
1159
+ data: ExtractedTable;
1160
+ mergedCells?: MergedCell[];
1161
+ comments?: ExtractedCellComment[];
1162
+ conditionalFormats?: ExtractedConditionalFormat[];
1163
+ dataValidations?: ExtractedDataValidation[];
1164
+ freezePane?: {
1165
+ row: number;
1166
+ column: number;
1167
+ };
1168
+ autoFilter?: {
1169
+ range: string;
1170
+ };
1171
+ }
1172
+ export interface MergedCell {
1173
+ startRow: number;
1174
+ startColumn: number;
1175
+ endRow: number;
1176
+ endColumn: number;
1177
+ }
1178
+ export interface ExtractedCellComment {
1179
+ cell: string;
1180
+ author?: string;
1181
+ content: string;
1182
+ isResolved?: boolean;
1183
+ }
1184
+ export interface ExtractedConditionalFormat {
1185
+ range: string;
1186
+ type: string;
1187
+ priority: number;
1188
+ formula?: string;
1189
+ style?: CellStyle;
1190
+ }
1191
+ export interface ExtractedDataValidation {
1192
+ range: string;
1193
+ type: 'list' | 'whole' | 'decimal' | 'date' | 'time' | 'textLength' | 'custom';
1194
+ operator?: string;
1195
+ formula1?: string;
1196
+ formula2?: string;
1197
+ allowedValues?: unknown[];
1198
+ showDropdown?: boolean;
1199
+ showErrorMessage?: boolean;
1200
+ errorMessage?: string;
1201
+ }
1202
+ export interface ExtractedChart {
1203
+ id: string;
1204
+ name: string;
1205
+ type: string;
1206
+ title?: string;
1207
+ sheet: string;
1208
+ position: BoundingBox;
1209
+ data: {
1210
+ categories?: string[];
1211
+ series: ChartSeries[];
1212
+ };
1213
+ }
1214
+ export interface ChartSeries {
1215
+ name: string;
1216
+ values: number[];
1217
+ color?: string;
1218
+ }
1219
+ export interface ExtractedName {
1220
+ name: string;
1221
+ value: string;
1222
+ scope: string | 'workbook';
1223
+ comment?: string;
1224
+ }
1225
+ export interface ExtractedCellStyle {
1226
+ id: string;
1227
+ name?: string;
1228
+ font?: Partial<FontConfig>;
1229
+ fill?: {
1230
+ type: string;
1231
+ color?: string;
1232
+ };
1233
+ border?: Record<string, BorderStyle>;
1234
+ alignment?: Record<string, unknown>;
1235
+ numFmt?: string;
1236
+ }
1237
+ /**
1238
+ * CSV-specific extraction options
1239
+ */
1240
+ export interface CSVExtractionOptions {
1241
+ delimiter?: string;
1242
+ quote?: string;
1243
+ escape?: string;
1244
+ hasHeaders?: boolean;
1245
+ encoding?: BufferEncoding;
1246
+ skipEmptyLines?: boolean;
1247
+ trimFields?: boolean;
1248
+ maxRows?: number;
1249
+ columns?: string[] | number[];
1250
+ transformValues?: boolean;
1251
+ }
1252
+ /**
1253
+ * CSV extraction result
1254
+ */
1255
+ export interface CSVExtractionResult extends ExtractionResult {
1256
+ data: {
1257
+ headers?: string[];
1258
+ rows: unknown[][];
1259
+ records: Record<string, unknown>[];
1260
+ statistics: CSVStatistics;
1261
+ };
1262
+ }
1263
+ export interface CSVStatistics {
1264
+ rowCount: number;
1265
+ columnCount: number;
1266
+ emptyRowCount: number;
1267
+ duplicateRowCount: number;
1268
+ columnTypes: Record<string, 'string' | 'number' | 'date' | 'boolean' | 'mixed'>;
1269
+ nullCounts: Record<string, number>;
1270
+ uniqueCounts: Record<string, number>;
1271
+ }
1272
+ /**
1273
+ * Image-specific extraction options
1274
+ */
1275
+ export interface ImageExtractionOptions {
1276
+ extractMetadata?: boolean;
1277
+ extractExif?: boolean;
1278
+ extractIptc?: boolean;
1279
+ extractXmp?: boolean;
1280
+ extractColors?: boolean;
1281
+ extractText?: boolean;
1282
+ ocrLanguage?: string | string[];
1283
+ colorCount?: number;
1284
+ analyzeFaces?: boolean;
1285
+ analyzeObjects?: boolean;
1286
+ }
1287
+ /**
1288
+ * Image extraction result
1289
+ */
1290
+ export interface ImageExtractionResult extends ExtractionResult {
1291
+ data: {
1292
+ dimensions: {
1293
+ width: number;
1294
+ height: number;
1295
+ };
1296
+ format: string;
1297
+ colorSpace: string;
1298
+ bitDepth: number;
1299
+ hasAlpha: boolean;
1300
+ isAnimated: boolean;
1301
+ frameCount?: number;
1302
+ metadata?: ImageMetadata;
1303
+ dominantColors?: DominantColor[];
1304
+ text?: ExtractedText;
1305
+ faces?: DetectedFace[];
1306
+ objects?: DetectedObject[];
1307
+ };
1308
+ }
1309
+ export interface DominantColor {
1310
+ color: string;
1311
+ hex: string;
1312
+ rgb: {
1313
+ r: number;
1314
+ g: number;
1315
+ b: number;
1316
+ };
1317
+ percentage: number;
1318
+ name?: string;
1319
+ }
1320
+ export interface DetectedFace {
1321
+ boundingBox: BoundingBox;
1322
+ confidence: number;
1323
+ landmarks?: FaceLandmark[];
1324
+ attributes?: FaceAttributes;
1325
+ }
1326
+ export interface FaceLandmark {
1327
+ type: string;
1328
+ x: number;
1329
+ y: number;
1330
+ }
1331
+ export interface FaceAttributes {
1332
+ age?: number;
1333
+ gender?: string;
1334
+ emotion?: Record<string, number>;
1335
+ }
1336
+ export interface DetectedObject {
1337
+ name: string;
1338
+ confidence: number;
1339
+ boundingBox: BoundingBox;
1340
+ category?: string;
1341
+ }
1342
+ /**
1343
+ * Document analysis options
1344
+ */
1345
+ export interface AnalysisOptions {
1346
+ analyzeStructure?: boolean;
1347
+ analyzeContent?: boolean;
1348
+ analyzeStyle?: boolean;
1349
+ analyzeSecurity?: boolean;
1350
+ analyzeAccessibility?: boolean;
1351
+ analyzeQuality?: boolean;
1352
+ generateSummary?: boolean;
1353
+ extractKeywords?: boolean;
1354
+ extractEntities?: boolean;
1355
+ detectLanguage?: boolean;
1356
+ detectSentiment?: boolean;
1357
+ }
1358
+ /**
1359
+ * Document analysis result
1360
+ */
1361
+ export interface AnalysisResult {
1362
+ documentInfo: DocumentMetadata;
1363
+ structure?: StructureAnalysis;
1364
+ content?: ContentAnalysis;
1365
+ style?: StyleAnalysis;
1366
+ security?: SecurityAnalysis;
1367
+ accessibility?: AccessibilityAnalysis;
1368
+ quality?: QualityAnalysis;
1369
+ summary?: DocumentSummary;
1370
+ }
1371
+ export interface StructureAnalysis {
1372
+ sections: number;
1373
+ chapters: number;
1374
+ headings: HeadingInfo[];
1375
+ tableOfContents?: ExtractedBookmark[];
1376
+ pageBreaks: number;
1377
+ columns: number;
1378
+ }
1379
+ export interface HeadingInfo {
1380
+ level: number;
1381
+ text: string;
1382
+ pageNumber?: number;
1383
+ count?: number;
1384
+ }
1385
+ export interface ContentAnalysis {
1386
+ textStatistics: TextStatistics;
1387
+ keywords: KeywordInfo[];
1388
+ entities?: ExtractedEntity[];
1389
+ topics?: TopicInfo[];
1390
+ language?: LanguageInfo;
1391
+ sentiment?: SentimentInfo;
1392
+ }
1393
+ export interface KeywordInfo {
1394
+ keyword: string;
1395
+ frequency: number;
1396
+ relevance?: number;
1397
+ score?: number;
1398
+ positions?: number[];
1399
+ }
1400
+ export interface ExtractedEntity {
1401
+ text: string;
1402
+ type: 'person' | 'organization' | 'location' | 'date' | 'money' | 'email' | 'phone' | 'url' | 'custom';
1403
+ confidence: number;
1404
+ positions?: number[];
1405
+ metadata?: Record<string, unknown>;
1406
+ }
1407
+ export interface TopicInfo {
1408
+ topic: string;
1409
+ confidence: number;
1410
+ keywords: string[];
1411
+ }
1412
+ export interface LanguageInfo {
1413
+ detected: string;
1414
+ confidence: number;
1415
+ alternatives?: Array<{
1416
+ language: string;
1417
+ confidence: number;
1418
+ }>;
1419
+ }
1420
+ export interface SentimentInfo {
1421
+ overall: 'positive' | 'negative' | 'neutral' | 'mixed';
1422
+ score: number;
1423
+ magnitude: number;
1424
+ breakdown?: Record<string, SentimentScore>;
1425
+ }
1426
+ export interface SentimentScore {
1427
+ sentiment: 'positive' | 'negative' | 'neutral';
1428
+ score: number;
1429
+ }
1430
+ export interface StyleAnalysis {
1431
+ fonts: FontUsage[];
1432
+ colors: ColorUsage[];
1433
+ paragraphStyles: ParagraphStyleUsage[];
1434
+ consistency: ConsistencyReport;
1435
+ }
1436
+ export interface FontUsage {
1437
+ fontFamily: string;
1438
+ usageCount: number;
1439
+ sizes: number[];
1440
+ styles: string[];
1441
+ }
1442
+ export interface ColorUsage {
1443
+ color: string;
1444
+ hex: string;
1445
+ usageCount: number;
1446
+ usedFor: ('text' | 'background' | 'border' | 'highlight')[];
1447
+ }
1448
+ export interface ParagraphStyleUsage {
1449
+ name: string;
1450
+ usageCount: number;
1451
+ }
1452
+ export interface ConsistencyReport {
1453
+ isConsistent: boolean;
1454
+ issues: ConsistencyIssue[];
1455
+ score: number;
1456
+ }
1457
+ export interface ConsistencyIssue {
1458
+ type: 'font' | 'size' | 'spacing' | 'alignment' | 'color';
1459
+ description: string;
1460
+ locations: number[];
1461
+ severity: 'low' | 'medium' | 'high';
1462
+ }
1463
+ export interface SecurityAnalysis {
1464
+ isEncrypted: boolean;
1465
+ encryptionType?: string;
1466
+ hasPassword: boolean;
1467
+ permissions?: DocumentPermissions;
1468
+ signatures?: DigitalSignature[];
1469
+ macros?: MacroInfo[];
1470
+ externalLinks?: ExtractedLink[];
1471
+ risks: SecurityRisk[];
1472
+ }
1473
+ export interface DocumentPermissions {
1474
+ canPrint: boolean;
1475
+ canModify: boolean;
1476
+ canCopy: boolean;
1477
+ canAnnotate: boolean;
1478
+ canFillForms: boolean;
1479
+ canExtract: boolean;
1480
+ canAssemble: boolean;
1481
+ }
1482
+ export interface DigitalSignature {
1483
+ signer: string;
1484
+ signedAt: Date;
1485
+ isValid: boolean;
1486
+ certificateInfo?: CertificateInfo;
1487
+ }
1488
+ export interface CertificateInfo {
1489
+ issuer: string;
1490
+ subject: string;
1491
+ validFrom: Date;
1492
+ validTo: Date;
1493
+ serialNumber: string;
1494
+ }
1495
+ export interface MacroInfo {
1496
+ name: string;
1497
+ type: string;
1498
+ isSafe: boolean;
1499
+ code?: string;
1500
+ }
1501
+ export interface SecurityRisk {
1502
+ type: string;
1503
+ severity: 'low' | 'medium' | 'high' | 'critical';
1504
+ description: string;
1505
+ recommendation: string;
1506
+ }
1507
+ export interface AccessibilityAnalysis {
1508
+ score: number;
1509
+ isAccessible: boolean;
1510
+ issues: AccessibilityIssue[];
1511
+ hasAltText: boolean;
1512
+ hasTableHeaders: boolean;
1513
+ hasDocumentTitle: boolean;
1514
+ hasLanguage: boolean;
1515
+ headingStructure: 'correct' | 'incorrect' | 'missing';
1516
+ readingOrder: 'correct' | 'incorrect' | 'unknown';
1517
+ }
1518
+ export interface AccessibilityIssue {
1519
+ type: string;
1520
+ wcagCriteria?: string;
1521
+ severity: 'minor' | 'moderate' | 'serious' | 'critical';
1522
+ description: string;
1523
+ element?: string;
1524
+ pageNumber?: number;
1525
+ recommendation: string;
1526
+ }
1527
+ export interface QualityAnalysis {
1528
+ overallScore: number;
1529
+ imageQuality: ImageQualityReport;
1530
+ textQuality: TextQualityReport;
1531
+ formattingQuality: FormattingQualityReport;
1532
+ recommendations: QualityRecommendation[];
1533
+ }
1534
+ export interface ImageQualityReport {
1535
+ totalImages: number;
1536
+ lowResolutionCount: number;
1537
+ optimalResolutionCount: number;
1538
+ averageDpi: number;
1539
+ totalSize: number;
1540
+ recommendations: string[];
1541
+ }
1542
+ export interface TextQualityReport {
1543
+ spellingErrors: number;
1544
+ grammarIssues: number;
1545
+ readabilityScore: number;
1546
+ inconsistencies: string[];
1547
+ }
1548
+ export interface FormattingQualityReport {
1549
+ orphanLines: number;
1550
+ widowLines: number;
1551
+ overflowingText: number;
1552
+ inconsistentSpacing: number;
1553
+ }
1554
+ export interface QualityRecommendation {
1555
+ area: 'images' | 'text' | 'formatting' | 'structure';
1556
+ issue: string;
1557
+ recommendation: string;
1558
+ priority: 'low' | 'medium' | 'high';
1559
+ }
1560
+ export interface DocumentSummary {
1561
+ title: string;
1562
+ abstract: string;
1563
+ keyPoints: string[];
1564
+ wordCount: number;
1565
+ estimatedReadTime: number;
1566
+ }
1567
+ /**
1568
+ * Document comparison options
1569
+ */
1570
+ export interface ComparisonOptions {
1571
+ compareText?: boolean;
1572
+ compareFormatting?: boolean;
1573
+ compareImages?: boolean;
1574
+ compareTables?: boolean;
1575
+ compareMetadata?: boolean;
1576
+ ignoreWhitespace?: boolean;
1577
+ ignoreCase?: boolean;
1578
+ ignoreFormatting?: boolean;
1579
+ granularity?: 'character' | 'word' | 'sentence' | 'paragraph' | 'page';
1580
+ outputFormat?: 'detailed' | 'summary' | 'visual';
1581
+ }
1582
+ /**
1583
+ * Document comparison result
1584
+ */
1585
+ export interface ComparisonResult {
1586
+ areIdentical: boolean;
1587
+ similarityScore: number;
1588
+ differences: DocumentDifference[];
1589
+ additions: DocumentChange[];
1590
+ deletions: DocumentChange[];
1591
+ modifications: DocumentChange[];
1592
+ summary: ComparisonSummary;
1593
+ }
1594
+ export interface DocumentDifference {
1595
+ type: 'addition' | 'deletion' | 'modification' | 'move' | 'format';
1596
+ location: {
1597
+ document: 'source' | 'target';
1598
+ pageNumber?: number;
1599
+ position?: number;
1600
+ path?: string;
1601
+ };
1602
+ content: {
1603
+ original?: string;
1604
+ modified?: string;
1605
+ };
1606
+ metadata?: Record<string, unknown>;
1607
+ }
1608
+ export interface DocumentChange {
1609
+ type: 'text' | 'image' | 'table' | 'style' | 'metadata';
1610
+ content: string;
1611
+ location: string;
1612
+ details?: Record<string, unknown>;
1613
+ }
1614
+ export interface ComparisonSummary {
1615
+ totalChanges: number;
1616
+ addedCharacters: number;
1617
+ deletedCharacters: number;
1618
+ addedWords: number;
1619
+ deletedWords: number;
1620
+ modifiedParagraphs: number;
1621
+ addedImages: number;
1622
+ deletedImages: number;
1623
+ modifiedTables: number;
1624
+ }
1625
+ /**
1626
+ * Search options
1627
+ */
1628
+ export interface SearchOptions {
1629
+ query?: string;
1630
+ caseSensitive?: boolean;
1631
+ wholeWord?: boolean;
1632
+ regex?: boolean;
1633
+ useRegex?: boolean;
1634
+ fuzzy?: boolean;
1635
+ fuzzyMatch?: boolean;
1636
+ fuzzyThreshold?: number;
1637
+ maxResults?: number;
1638
+ highlightMatches?: boolean;
1639
+ includeContext?: boolean;
1640
+ contextLength?: number;
1641
+ highlightTag?: string;
1642
+ searchIn?: ('text' | 'metadata' | 'comments' | 'annotations')[];
1643
+ searchFields?: string[];
1644
+ fileTypes?: FileFormat[];
1645
+ dateRange?: {
1646
+ from?: Date;
1647
+ to?: Date;
1648
+ };
1649
+ sortBy?: 'relevance' | 'date' | 'name' | 'size' | 'position' | 'document';
1650
+ sortOrder?: 'asc' | 'desc' | 'ascending' | 'descending';
1651
+ }
1652
+ /**
1653
+ * Search result
1654
+ */
1655
+ export interface SearchResult {
1656
+ query?: string;
1657
+ totalMatches?: number;
1658
+ totalHits?: number;
1659
+ hits?: Array<{
1660
+ id: string;
1661
+ documentId: string;
1662
+ documentName: string;
1663
+ matchedText: string;
1664
+ context: string;
1665
+ startPosition: number;
1666
+ endPosition: number;
1667
+ lineNumber?: number;
1668
+ pageNumber?: number;
1669
+ section?: string;
1670
+ score: number;
1671
+ highlightedSnippet: string;
1672
+ }>;
1673
+ documents?: DocumentSearchResult[];
1674
+ facets?: SearchFacets | Record<string, Array<{
1675
+ value: string;
1676
+ count: number;
1677
+ }>>;
1678
+ suggestions?: string[];
1679
+ duration?: number;
1680
+ searchDuration?: number;
1681
+ }
1682
+ export interface DocumentSearchResult {
1683
+ file: string;
1684
+ format: FileFormat;
1685
+ matches: SearchMatch[];
1686
+ score: number;
1687
+ highlights?: string[];
1688
+ metadata?: DocumentMetadata;
1689
+ }
1690
+ export interface SearchMatch {
1691
+ content: string;
1692
+ context: string;
1693
+ position: {
1694
+ start: number;
1695
+ end: number;
1696
+ pageNumber?: number;
1697
+ lineNumber?: number;
1698
+ };
1699
+ score: number;
1700
+ type: 'text' | 'metadata' | 'comment' | 'annotation';
1701
+ }
1702
+ export interface SearchFacets {
1703
+ formats: Record<string, number>;
1704
+ dates: Record<string, number>;
1705
+ authors: Record<string, number>;
1706
+ keywords: Record<string, number>;
1707
+ }
1708
+ /**
1709
+ * Index options for search optimization
1710
+ */
1711
+ export interface IndexOptions {
1712
+ fields?: string[];
1713
+ stemming?: boolean;
1714
+ stopWords?: string[] | boolean;
1715
+ minWordLength?: number;
1716
+ maxWordLength?: number;
1717
+ boost?: Record<string, number>;
1718
+ analyzers?: Record<string, TextAnalyzer>;
1719
+ }
1720
+ export interface TextAnalyzer {
1721
+ tokenizer: 'standard' | 'whitespace' | 'letter' | 'custom';
1722
+ filters: Array<'lowercase' | 'stopwords' | 'stemmer' | 'synonyms' | 'ngram'>;
1723
+ customTokenizer?: (text: string) => string[];
1724
+ }
1725
+ /**
1726
+ * Batch job configuration
1727
+ */
1728
+ export interface BatchJobConfig {
1729
+ id?: string;
1730
+ name?: string;
1731
+ files?: Array<string | Buffer | {
1732
+ data: InputDataType;
1733
+ name: string;
1734
+ }>;
1735
+ operation?: 'convert' | 'extract' | 'analyze' | 'compare' | 'search';
1736
+ options?: ConvertFileOptions | PDFExtractionOptions | AnalysisOptions | ComparisonOptions | SearchOptions;
1737
+ defaultOptions?: Record<string, unknown>;
1738
+ outputDir?: string;
1739
+ outputDirectory?: string;
1740
+ concurrency?: number;
1741
+ retries?: number;
1742
+ retryAttempts?: number;
1743
+ retryDelay?: number;
1744
+ timeout?: number;
1745
+ continueOnError?: boolean;
1746
+ onProgress?: (progress: BatchProgress) => void;
1747
+ onFileComplete?: (result: BatchFileResult) => void;
1748
+ onError?: (error: Error, file: string) => void;
1749
+ }
1750
+ /**
1751
+ * Batch job status
1752
+ */
1753
+ export type BatchJobStatus = 'pending' | 'processing' | 'paused' | 'completed' | 'failed' | 'cancelled';
1754
+ /**
1755
+ * Batch progress information
1756
+ */
1757
+ export interface BatchProgress {
1758
+ jobId: string;
1759
+ totalFiles: number;
1760
+ completedFiles: number;
1761
+ failedFiles: number;
1762
+ currentFile?: string;
1763
+ progress: number;
1764
+ estimatedTimeRemaining?: number;
1765
+ startedAt: Date;
1766
+ status: 'pending' | 'running' | 'paused' | 'completed' | 'failed' | 'cancelled';
1767
+ }
1768
+ /**
1769
+ * Batch file result
1770
+ */
1771
+ export interface BatchFileResult {
1772
+ file: string;
1773
+ success: boolean;
1774
+ result?: ConversionResult | ExtractionResult | AnalysisResult;
1775
+ error?: Error;
1776
+ duration: number;
1777
+ outputPath?: string;
1778
+ }
1779
+ /**
1780
+ * Batch job result
1781
+ */
1782
+ export interface BatchJobResult {
1783
+ jobId: string;
1784
+ success?: boolean;
1785
+ status?: 'completed' | 'partial' | 'failed';
1786
+ totalFiles?: number;
1787
+ totalItems?: number;
1788
+ successCount?: number;
1789
+ successfulItems?: number;
1790
+ failedCount?: number;
1791
+ failedItems?: number;
1792
+ results?: BatchFileResult[] | unknown[];
1793
+ errors?: Array<{
1794
+ file?: string;
1795
+ itemId?: string;
1796
+ error: string | Error;
1797
+ }>;
1798
+ duration?: number;
1799
+ totalDuration?: number;
1800
+ averageItemDuration?: number;
1801
+ startedAt?: Date;
1802
+ startTime?: Date;
1803
+ completedAt?: Date;
1804
+ endTime?: Date;
1805
+ }
1806
+ /**
1807
+ * Stream processing options
1808
+ */
1809
+ export interface StreamProcessingOptions {
1810
+ chunkSize?: number;
1811
+ highWaterMark?: number;
1812
+ encoding?: BufferEncoding;
1813
+ onChunk?: (chunk: Buffer, index: number) => void | Promise<void>;
1814
+ onProgress?: (bytesProcessed: number, totalBytes?: number) => void;
1815
+ transform?: (chunk: Buffer) => Buffer | Promise<Buffer>;
1816
+ filter?: (chunk: Buffer) => boolean;
1817
+ maxMemory?: number;
1818
+ tempDir?: string;
1819
+ pauseOnBackpressure?: boolean;
1820
+ emitProgress?: boolean;
1821
+ progressInterval?: number;
1822
+ }
1823
+ /**
1824
+ * Stream result
1825
+ */
1826
+ export interface StreamResult {
1827
+ stream: ReadableStream<Uint8Array>;
1828
+ metadata: {
1829
+ totalSize?: number;
1830
+ chunkCount?: number;
1831
+ format: FileFormat;
1832
+ mimeType: string;
1833
+ };
1834
+ abort: () => void;
1835
+ }
1836
+ /**
1837
+ * Extractor plugin interface
1838
+ */
1839
+ export interface ExtractorPlugin {
1840
+ name: string;
1841
+ version: string;
1842
+ supportedFormats: FileFormat[];
1843
+ extract(data: InputDataType, options: Record<string, unknown>): Promise<ExtractionResult>;
1844
+ canExtract?(data: InputDataType): boolean;
1845
+ }
1846
+ /**
1847
+ * Analyzer plugin interface
1848
+ */
1849
+ export interface AnalyzerPlugin {
1850
+ name: string;
1851
+ version: string;
1852
+ supportedFormats: FileFormat[];
1853
+ analyze(data: InputDataType, options: AnalysisOptions): Promise<AnalysisResult>;
1854
+ }
1855
+ /**
1856
+ * Processor plugin interface (for transformations)
1857
+ */
1858
+ export interface ProcessorPlugin {
1859
+ name: string;
1860
+ version: string;
1861
+ supportedFormats: FileFormat[];
1862
+ process(data: Buffer, options: Record<string, unknown>): Promise<Buffer>;
1863
+ priority?: number;
1864
+ }
1865
+ /**
1866
+ * Hook types for extensibility
1867
+ */
1868
+ export interface ConvertitHooks {
1869
+ beforeExtract?: (data: InputDataType, options: Record<string, unknown>) => Promise<InputDataType>;
1870
+ afterExtract?: <T>(result: ExtractionResult<T>) => Promise<ExtractionResult<T>>;
1871
+ beforeAnalyze?: (data: InputDataType, options: AnalysisOptions) => Promise<InputDataType>;
1872
+ afterAnalyze?: (result: AnalysisResult) => Promise<AnalysisResult>;
1873
+ beforeBatch?: (config: BatchJobConfig) => Promise<BatchJobConfig>;
1874
+ afterBatch?: (result: BatchJobResult) => Promise<BatchJobResult>;
1875
+ onError?: (error: Error, context: Record<string, unknown>) => void;
1876
+ }
1877
+ /**
1878
+ * Extractor configuration
1879
+ */
1880
+ export interface ExtractorConfig {
1881
+ tempDir?: string;
1882
+ maxFileSize?: number;
1883
+ timeout?: number;
1884
+ cache?: boolean;
1885
+ cacheDir?: string;
1886
+ cacheTTL?: number;
1887
+ verbose?: boolean;
1888
+ plugins?: ExtractorPlugin[];
1889
+ }
1890
+ /**
1891
+ * Universal extraction options
1892
+ */
1893
+ export interface UniversalExtractionOptions {
1894
+ pdf?: PDFExtractionOptions;
1895
+ word?: WordExtractionOptions;
1896
+ excel?: ExcelExtractionOptions;
1897
+ csv?: CSVExtractionOptions;
1898
+ image?: ImageExtractionOptions;
1899
+ autoDetect?: boolean;
1900
+ fallbackFormat?: FileFormat;
1901
+ }
702
1902
  //# sourceMappingURL=types.d.ts.map