convertit 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -6
- package/dist/analysis/index.d.ts +38 -0
- package/dist/analysis/index.d.ts.map +1 -0
- package/dist/batch/index.d.ts +116 -0
- package/dist/batch/index.d.ts.map +1 -0
- package/dist/converters/excel-styles.d.ts.map +1 -1
- package/dist/converters/index.d.ts +10 -6
- package/dist/converters/index.d.ts.map +1 -1
- package/dist/converters/pdf.d.ts.map +1 -1
- package/dist/core/converter.d.ts +54 -2
- package/dist/core/converter.d.ts.map +1 -1
- package/dist/core/errors.d.ts +5 -0
- package/dist/core/errors.d.ts.map +1 -1
- package/dist/core/types.d.ts +1201 -1
- package/dist/core/types.d.ts.map +1 -1
- package/dist/extractors/base.d.ts +151 -0
- package/dist/extractors/base.d.ts.map +1 -0
- package/dist/extractors/csv.d.ts +89 -0
- package/dist/extractors/csv.d.ts.map +1 -0
- package/dist/extractors/excel.d.ts +102 -0
- package/dist/extractors/excel.d.ts.map +1 -0
- package/dist/extractors/image.d.ts +94 -0
- package/dist/extractors/image.d.ts.map +1 -0
- package/dist/extractors/index.d.ts +16 -0
- package/dist/extractors/index.d.ts.map +1 -0
- package/dist/extractors/pdf.d.ts +89 -0
- package/dist/extractors/pdf.d.ts.map +1 -0
- package/dist/extractors/word.d.ts +83 -0
- package/dist/extractors/word.d.ts.map +1 -0
- package/dist/index.d.ts +8 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +84776 -80182
- package/dist/search/index.d.ts +105 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/streaming/index.d.ts +94 -0
- package/dist/streaming/index.d.ts.map +1 -0
- package/dist/utils/helpers.d.ts.map +1 -1
- package/package.json +41 -3
package/dist/core/types.d.ts
CHANGED
|
@@ -311,7 +311,7 @@ export interface CellStyle {
|
|
|
311
311
|
numFmt?: string;
|
|
312
312
|
}
|
|
313
313
|
export interface BorderStyle {
|
|
314
|
-
style?: 'thin' | 'medium' | 'thick' | 'dotted' | 'dashed' | 'double' | 'hair' | 'mediumDashed' | 'dashDot' | 'mediumDashDot' | 'dashDotDot' | 'slantDashDot';
|
|
314
|
+
style?: 'thin' | 'medium' | 'thick' | 'dotted' | 'dashed' | 'double' | 'hair' | 'mediumDashed' | 'dashDot' | 'mediumDashDot' | 'dashDotDot' | 'slantDashDot' | 'mediumDashDotDot';
|
|
315
315
|
color?: string;
|
|
316
316
|
}
|
|
317
317
|
export type ComparisonOperator = 'equal' | 'notEqual' | 'greaterThan' | 'lessThan' | 'greaterThanOrEqual' | 'lessThanOrEqual' | 'contains' | 'notContains' | 'startsWith' | 'endsWith' | 'isEmpty' | 'isNotEmpty';
|
|
@@ -699,4 +699,1204 @@ export declare const MIME_TYPES: Record<FileFormat, string>;
|
|
|
699
699
|
export declare const DEFAULT_PAGE_SIZES: Record<string, PageSize>;
|
|
700
700
|
export declare const DEFAULT_MARGINS: PageMargins;
|
|
701
701
|
export declare const DEFAULT_FONT: FontConfig;
|
|
702
|
+
/**
|
|
703
|
+
* Base extraction result interface
|
|
704
|
+
*/
|
|
705
|
+
export interface ExtractionResult<T = unknown> {
|
|
706
|
+
success: boolean;
|
|
707
|
+
data: T;
|
|
708
|
+
format: FileFormat;
|
|
709
|
+
sourceFile?: string;
|
|
710
|
+
metadata: DocumentMetadata;
|
|
711
|
+
duration: number;
|
|
712
|
+
warnings?: string[];
|
|
713
|
+
errors?: string[];
|
|
714
|
+
}
|
|
715
|
+
/**
|
|
716
|
+
* Document metadata extracted from files
|
|
717
|
+
*/
|
|
718
|
+
export interface DocumentMetadata {
|
|
719
|
+
title?: string;
|
|
720
|
+
author?: string;
|
|
721
|
+
creator?: string;
|
|
722
|
+
producer?: string;
|
|
723
|
+
subject?: string;
|
|
724
|
+
keywords?: string[];
|
|
725
|
+
creationDate?: Date;
|
|
726
|
+
modificationDate?: Date;
|
|
727
|
+
pageCount?: number;
|
|
728
|
+
wordCount?: number;
|
|
729
|
+
characterCount?: number;
|
|
730
|
+
language?: string;
|
|
731
|
+
encrypted?: boolean;
|
|
732
|
+
fileSize?: number;
|
|
733
|
+
version?: string;
|
|
734
|
+
customProperties?: Record<string, unknown>;
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Extracted text content with positioning
|
|
738
|
+
*/
|
|
739
|
+
export interface ExtractedText {
|
|
740
|
+
content: string;
|
|
741
|
+
pages?: PageText[];
|
|
742
|
+
paragraphs?: TextBlock[];
|
|
743
|
+
lines?: TextLine[];
|
|
744
|
+
words?: TextWord[];
|
|
745
|
+
statistics: TextStatistics;
|
|
746
|
+
}
|
|
747
|
+
export interface PageText {
|
|
748
|
+
pageNumber: number;
|
|
749
|
+
content: string;
|
|
750
|
+
paragraphs: TextBlock[];
|
|
751
|
+
lines: TextLine[];
|
|
752
|
+
boundingBox?: BoundingBox;
|
|
753
|
+
}
|
|
754
|
+
export interface TextBlock {
|
|
755
|
+
id: string;
|
|
756
|
+
content: string;
|
|
757
|
+
type: 'paragraph' | 'heading' | 'list' | 'code' | 'quote' | 'footnote';
|
|
758
|
+
level?: number;
|
|
759
|
+
style?: TextStyle;
|
|
760
|
+
boundingBox?: BoundingBox;
|
|
761
|
+
pageNumber?: number;
|
|
762
|
+
}
|
|
763
|
+
export interface TextLine {
|
|
764
|
+
content: string;
|
|
765
|
+
lineNumber: number;
|
|
766
|
+
boundingBox?: BoundingBox;
|
|
767
|
+
confidence?: number;
|
|
768
|
+
}
|
|
769
|
+
export interface TextWord {
|
|
770
|
+
content: string;
|
|
771
|
+
boundingBox?: BoundingBox;
|
|
772
|
+
confidence?: number;
|
|
773
|
+
fontName?: string;
|
|
774
|
+
fontSize?: number;
|
|
775
|
+
}
|
|
776
|
+
export interface TextStyle {
|
|
777
|
+
fontFamily?: string;
|
|
778
|
+
fontSize?: number;
|
|
779
|
+
fontWeight?: 'normal' | 'bold';
|
|
780
|
+
fontStyle?: 'normal' | 'italic';
|
|
781
|
+
textDecoration?: 'none' | 'underline' | 'strikethrough';
|
|
782
|
+
color?: string;
|
|
783
|
+
backgroundColor?: string;
|
|
784
|
+
alignment?: 'left' | 'center' | 'right' | 'justify';
|
|
785
|
+
lineHeight?: number;
|
|
786
|
+
letterSpacing?: number;
|
|
787
|
+
}
|
|
788
|
+
export interface TextStatistics {
|
|
789
|
+
totalCharacters: number;
|
|
790
|
+
totalWords: number;
|
|
791
|
+
totalSentences: number;
|
|
792
|
+
totalParagraphs: number;
|
|
793
|
+
totalPages: number;
|
|
794
|
+
averageWordsPerSentence: number;
|
|
795
|
+
averageCharactersPerWord: number;
|
|
796
|
+
readingTimeMinutes: number;
|
|
797
|
+
speakingTimeMinutes: number;
|
|
798
|
+
readabilityScores?: ReadabilityScores;
|
|
799
|
+
}
|
|
800
|
+
export interface ReadabilityScores {
|
|
801
|
+
fleschKincaidGrade?: number;
|
|
802
|
+
fleschReadingEase?: number;
|
|
803
|
+
gunningFog?: number;
|
|
804
|
+
colemanLiau?: number;
|
|
805
|
+
automatedReadabilityIndex?: number;
|
|
806
|
+
smogIndex?: number;
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Bounding box for positioned elements
|
|
810
|
+
*/
|
|
811
|
+
export interface BoundingBox {
|
|
812
|
+
x: number;
|
|
813
|
+
y: number;
|
|
814
|
+
width: number;
|
|
815
|
+
height: number;
|
|
816
|
+
pageNumber?: number;
|
|
817
|
+
}
|
|
818
|
+
/**
|
|
819
|
+
* Extracted image from document
|
|
820
|
+
*/
|
|
821
|
+
export interface ExtractedImage {
|
|
822
|
+
id: string;
|
|
823
|
+
data: Buffer;
|
|
824
|
+
format: 'png' | 'jpg' | 'jpeg' | 'gif' | 'bmp' | 'tiff' | 'webp' | 'svg';
|
|
825
|
+
width: number;
|
|
826
|
+
height: number;
|
|
827
|
+
dpi?: number;
|
|
828
|
+
colorSpace?: 'rgb' | 'cmyk' | 'grayscale';
|
|
829
|
+
bitDepth?: number;
|
|
830
|
+
pageNumber?: number;
|
|
831
|
+
boundingBox?: BoundingBox;
|
|
832
|
+
altText?: string;
|
|
833
|
+
caption?: string;
|
|
834
|
+
metadata?: ImageMetadata;
|
|
835
|
+
}
|
|
836
|
+
export interface ImageMetadata {
|
|
837
|
+
exif?: ExifData;
|
|
838
|
+
iptc?: IptcData;
|
|
839
|
+
xmp?: Record<string, unknown>;
|
|
840
|
+
icc?: IccProfile;
|
|
841
|
+
}
|
|
842
|
+
export interface ExifData {
|
|
843
|
+
make?: string;
|
|
844
|
+
model?: string;
|
|
845
|
+
dateTime?: Date;
|
|
846
|
+
exposureTime?: string;
|
|
847
|
+
fNumber?: number;
|
|
848
|
+
iso?: number;
|
|
849
|
+
focalLength?: number;
|
|
850
|
+
gpsLatitude?: number;
|
|
851
|
+
gpsLongitude?: number;
|
|
852
|
+
orientation?: number;
|
|
853
|
+
software?: string;
|
|
854
|
+
[key: string]: unknown;
|
|
855
|
+
}
|
|
856
|
+
export interface IptcData {
|
|
857
|
+
title?: string;
|
|
858
|
+
description?: string;
|
|
859
|
+
keywords?: string[];
|
|
860
|
+
copyright?: string;
|
|
861
|
+
creator?: string;
|
|
862
|
+
city?: string;
|
|
863
|
+
country?: string;
|
|
864
|
+
[key: string]: unknown;
|
|
865
|
+
}
|
|
866
|
+
export interface IccProfile {
|
|
867
|
+
description?: string;
|
|
868
|
+
colorSpace?: string;
|
|
869
|
+
profileClass?: string;
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Extracted table from document
|
|
873
|
+
*/
|
|
874
|
+
export interface ExtractedTable {
|
|
875
|
+
id: string;
|
|
876
|
+
headers?: string[];
|
|
877
|
+
rows: TableRow[];
|
|
878
|
+
columnCount: number;
|
|
879
|
+
rowCount: number;
|
|
880
|
+
pageNumber?: number;
|
|
881
|
+
boundingBox?: BoundingBox;
|
|
882
|
+
style?: TableStyle;
|
|
883
|
+
name?: string;
|
|
884
|
+
summary?: string;
|
|
885
|
+
}
|
|
886
|
+
export interface TableRow {
|
|
887
|
+
cells: TableCell[];
|
|
888
|
+
isHeader?: boolean;
|
|
889
|
+
rowIndex: number;
|
|
890
|
+
}
|
|
891
|
+
export interface TableCell {
|
|
892
|
+
content: string;
|
|
893
|
+
value?: unknown;
|
|
894
|
+
rowSpan?: number;
|
|
895
|
+
colSpan?: number;
|
|
896
|
+
columnIndex: number;
|
|
897
|
+
style?: CellStyle;
|
|
898
|
+
formula?: string;
|
|
899
|
+
dataType?: 'string' | 'number' | 'date' | 'boolean' | 'formula' | 'error' | 'empty';
|
|
900
|
+
}
|
|
901
|
+
export interface TableStyle {
|
|
902
|
+
borderCollapse?: 'collapse' | 'separate';
|
|
903
|
+
borderColor?: string;
|
|
904
|
+
headerBackground?: string;
|
|
905
|
+
alternateRowColors?: boolean;
|
|
906
|
+
}
|
|
907
|
+
/**
|
|
908
|
+
* Extracted hyperlink
|
|
909
|
+
*/
|
|
910
|
+
export interface ExtractedLink {
|
|
911
|
+
id: string;
|
|
912
|
+
text: string;
|
|
913
|
+
url: string;
|
|
914
|
+
type: 'external' | 'internal' | 'email' | 'phone' | 'anchor';
|
|
915
|
+
pageNumber?: number;
|
|
916
|
+
boundingBox?: BoundingBox;
|
|
917
|
+
isValid?: boolean;
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* Extracted annotation/comment
|
|
921
|
+
*/
|
|
922
|
+
export interface ExtractedAnnotation {
|
|
923
|
+
id: string;
|
|
924
|
+
type: 'highlight' | 'underline' | 'strikeout' | 'note' | 'comment' | 'freeText' | 'stamp' | 'drawing';
|
|
925
|
+
content?: string;
|
|
926
|
+
author?: string;
|
|
927
|
+
createdAt?: Date;
|
|
928
|
+
modifiedAt?: Date;
|
|
929
|
+
color?: string;
|
|
930
|
+
pageNumber?: number;
|
|
931
|
+
boundingBox?: BoundingBox;
|
|
932
|
+
replies?: ExtractedAnnotation[];
|
|
933
|
+
status?: 'open' | 'resolved' | 'accepted' | 'rejected';
|
|
934
|
+
}
|
|
935
|
+
/**
|
|
936
|
+
* Extracted form field
|
|
937
|
+
*/
|
|
938
|
+
export interface ExtractedFormField {
|
|
939
|
+
id: string;
|
|
940
|
+
name: string;
|
|
941
|
+
type: 'text' | 'checkbox' | 'radio' | 'select' | 'button' | 'signature' | 'date' | 'number';
|
|
942
|
+
value?: unknown;
|
|
943
|
+
options?: string[];
|
|
944
|
+
required?: boolean;
|
|
945
|
+
readOnly?: boolean;
|
|
946
|
+
maxLength?: number;
|
|
947
|
+
pageNumber?: number;
|
|
948
|
+
boundingBox?: BoundingBox;
|
|
949
|
+
validation?: FormFieldValidation;
|
|
950
|
+
}
|
|
951
|
+
export interface FormFieldValidation {
|
|
952
|
+
type?: 'none' | 'email' | 'phone' | 'url' | 'number' | 'date' | 'regex';
|
|
953
|
+
pattern?: string;
|
|
954
|
+
min?: number | Date;
|
|
955
|
+
max?: number | Date;
|
|
956
|
+
errorMessage?: string;
|
|
957
|
+
}
|
|
958
|
+
/**
|
|
959
|
+
* Extracted bookmark/outline
|
|
960
|
+
*/
|
|
961
|
+
export interface ExtractedBookmark {
|
|
962
|
+
id: string;
|
|
963
|
+
title: string;
|
|
964
|
+
pageNumber?: number;
|
|
965
|
+
destination?: string;
|
|
966
|
+
level: number;
|
|
967
|
+
children?: ExtractedBookmark[];
|
|
968
|
+
color?: string;
|
|
969
|
+
isOpen?: boolean;
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* Extracted attachment/embedded file
|
|
973
|
+
*/
|
|
974
|
+
export interface ExtractedAttachment {
|
|
975
|
+
id: string;
|
|
976
|
+
filename: string;
|
|
977
|
+
data: Buffer;
|
|
978
|
+
mimeType: string;
|
|
979
|
+
size: number;
|
|
980
|
+
description?: string;
|
|
981
|
+
createdAt?: Date;
|
|
982
|
+
modifiedAt?: Date;
|
|
983
|
+
checksum?: string;
|
|
984
|
+
}
|
|
985
|
+
/**
|
|
986
|
+
* General extraction options (base interface)
|
|
987
|
+
*/
|
|
988
|
+
export interface ExtractionOptions {
|
|
989
|
+
extractText?: boolean;
|
|
990
|
+
extractImages?: boolean;
|
|
991
|
+
extractTables?: boolean;
|
|
992
|
+
extractMetadata?: boolean;
|
|
993
|
+
preserveFormatting?: boolean;
|
|
994
|
+
[key: string]: unknown;
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* PDF-specific extraction options
|
|
998
|
+
*/
|
|
999
|
+
export interface PDFExtractionOptions {
|
|
1000
|
+
extractText?: boolean;
|
|
1001
|
+
extractImages?: boolean;
|
|
1002
|
+
extractTables?: boolean;
|
|
1003
|
+
extractLinks?: boolean;
|
|
1004
|
+
extractAnnotations?: boolean;
|
|
1005
|
+
extractForms?: boolean;
|
|
1006
|
+
extractBookmarks?: boolean;
|
|
1007
|
+
extractAttachments?: boolean;
|
|
1008
|
+
extractMetadata?: boolean;
|
|
1009
|
+
pages?: number[] | 'all';
|
|
1010
|
+
preserveLayout?: boolean;
|
|
1011
|
+
ocrIfNeeded?: boolean;
|
|
1012
|
+
ocrLanguage?: string | string[];
|
|
1013
|
+
password?: string;
|
|
1014
|
+
imageFormat?: 'png' | 'jpg' | 'webp';
|
|
1015
|
+
imageQuality?: number;
|
|
1016
|
+
minImageSize?: number;
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* PDF extraction result
|
|
1020
|
+
*/
|
|
1021
|
+
export interface PDFExtractionResult extends ExtractionResult {
|
|
1022
|
+
data: {
|
|
1023
|
+
text?: ExtractedText;
|
|
1024
|
+
images?: ExtractedImage[];
|
|
1025
|
+
tables?: ExtractedTable[];
|
|
1026
|
+
links?: ExtractedLink[];
|
|
1027
|
+
annotations?: ExtractedAnnotation[];
|
|
1028
|
+
formFields?: ExtractedFormField[];
|
|
1029
|
+
bookmarks?: ExtractedBookmark[];
|
|
1030
|
+
attachments?: ExtractedAttachment[];
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
/**
|
|
1034
|
+
* Word-specific extraction options
|
|
1035
|
+
*/
|
|
1036
|
+
export interface WordExtractionOptions {
|
|
1037
|
+
extractText?: boolean;
|
|
1038
|
+
extractImages?: boolean;
|
|
1039
|
+
extractTables?: boolean;
|
|
1040
|
+
extractStyles?: boolean;
|
|
1041
|
+
extractComments?: boolean;
|
|
1042
|
+
extractHeaders?: boolean;
|
|
1043
|
+
extractFooters?: boolean;
|
|
1044
|
+
extractFootnotes?: boolean;
|
|
1045
|
+
extractEndnotes?: boolean;
|
|
1046
|
+
extractBookmarks?: boolean;
|
|
1047
|
+
extractMetadata?: boolean;
|
|
1048
|
+
preserveFormatting?: boolean;
|
|
1049
|
+
includeTrackedChanges?: boolean;
|
|
1050
|
+
}
|
|
1051
|
+
/**
|
|
1052
|
+
* Word extraction result
|
|
1053
|
+
*/
|
|
1054
|
+
export interface WordExtractionResult extends ExtractionResult {
|
|
1055
|
+
data: {
|
|
1056
|
+
text?: ExtractedText;
|
|
1057
|
+
images?: ExtractedImage[];
|
|
1058
|
+
tables?: ExtractedTable[];
|
|
1059
|
+
styles?: ExtractedStyle[];
|
|
1060
|
+
comments?: ExtractedComment[];
|
|
1061
|
+
headers?: ExtractedHeaderFooter[];
|
|
1062
|
+
footers?: ExtractedHeaderFooter[];
|
|
1063
|
+
footnotes?: ExtractedNote[];
|
|
1064
|
+
endnotes?: ExtractedNote[];
|
|
1065
|
+
bookmarks?: ExtractedBookmark[];
|
|
1066
|
+
sections?: DocumentSection[];
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
export interface ExtractedStyle {
|
|
1070
|
+
id: string;
|
|
1071
|
+
name: string;
|
|
1072
|
+
type: 'paragraph' | 'character' | 'table' | 'list';
|
|
1073
|
+
basedOn?: string;
|
|
1074
|
+
font?: Partial<FontConfig>;
|
|
1075
|
+
paragraph?: {
|
|
1076
|
+
alignment?: 'left' | 'center' | 'right' | 'justify';
|
|
1077
|
+
spacing?: {
|
|
1078
|
+
before?: number;
|
|
1079
|
+
after?: number;
|
|
1080
|
+
line?: number;
|
|
1081
|
+
};
|
|
1082
|
+
indent?: {
|
|
1083
|
+
left?: number;
|
|
1084
|
+
right?: number;
|
|
1085
|
+
firstLine?: number;
|
|
1086
|
+
};
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
export interface ExtractedComment {
|
|
1090
|
+
id: string;
|
|
1091
|
+
author: string;
|
|
1092
|
+
content: string;
|
|
1093
|
+
createdAt?: Date;
|
|
1094
|
+
referencedText?: string;
|
|
1095
|
+
replies?: ExtractedComment[];
|
|
1096
|
+
resolved?: boolean;
|
|
1097
|
+
}
|
|
1098
|
+
export interface ExtractedHeaderFooter {
|
|
1099
|
+
type: 'header' | 'footer';
|
|
1100
|
+
section: 'first' | 'odd' | 'even' | 'default';
|
|
1101
|
+
content: string;
|
|
1102
|
+
images?: ExtractedImage[];
|
|
1103
|
+
}
|
|
1104
|
+
export interface ExtractedNote {
|
|
1105
|
+
id: string;
|
|
1106
|
+
type: 'footnote' | 'endnote';
|
|
1107
|
+
referenceNumber: number;
|
|
1108
|
+
content: string;
|
|
1109
|
+
}
|
|
1110
|
+
export interface DocumentSection {
|
|
1111
|
+
id: string;
|
|
1112
|
+
startPage: number;
|
|
1113
|
+
endPage: number;
|
|
1114
|
+
orientation: 'portrait' | 'landscape';
|
|
1115
|
+
pageSize: PageSize;
|
|
1116
|
+
margins: PageMargins;
|
|
1117
|
+
columns: number;
|
|
1118
|
+
}
|
|
1119
|
+
/**
|
|
1120
|
+
* Excel-specific extraction options
|
|
1121
|
+
*/
|
|
1122
|
+
export interface ExcelExtractionOptions {
|
|
1123
|
+
extractData?: boolean;
|
|
1124
|
+
extractFormulas?: boolean;
|
|
1125
|
+
extractStyles?: boolean;
|
|
1126
|
+
extractCharts?: boolean;
|
|
1127
|
+
extractImages?: boolean;
|
|
1128
|
+
extractComments?: boolean;
|
|
1129
|
+
extractNames?: boolean;
|
|
1130
|
+
extractValidation?: boolean;
|
|
1131
|
+
extractConditionalFormatting?: boolean;
|
|
1132
|
+
extractMetadata?: boolean;
|
|
1133
|
+
sheets?: string[] | number[] | 'all';
|
|
1134
|
+
includeHiddenSheets?: boolean;
|
|
1135
|
+
includeHiddenRows?: boolean;
|
|
1136
|
+
includeHiddenColumns?: boolean;
|
|
1137
|
+
evaluateFormulas?: boolean;
|
|
1138
|
+
dateFormat?: string;
|
|
1139
|
+
numberFormat?: string;
|
|
1140
|
+
password?: string;
|
|
1141
|
+
}
|
|
1142
|
+
/**
|
|
1143
|
+
* Excel extraction result
|
|
1144
|
+
*/
|
|
1145
|
+
export interface ExcelExtractionResult extends ExtractionResult {
|
|
1146
|
+
data: {
|
|
1147
|
+
sheets: ExtractedSheet[];
|
|
1148
|
+
charts?: ExtractedChart[];
|
|
1149
|
+
images?: ExtractedImage[];
|
|
1150
|
+
names?: ExtractedName[];
|
|
1151
|
+
styles?: ExtractedCellStyle[];
|
|
1152
|
+
};
|
|
1153
|
+
}
|
|
1154
|
+
export interface ExtractedSheet {
|
|
1155
|
+
id: string;
|
|
1156
|
+
name: string;
|
|
1157
|
+
index: number;
|
|
1158
|
+
isHidden: boolean;
|
|
1159
|
+
data: ExtractedTable;
|
|
1160
|
+
mergedCells?: MergedCell[];
|
|
1161
|
+
comments?: ExtractedCellComment[];
|
|
1162
|
+
conditionalFormats?: ExtractedConditionalFormat[];
|
|
1163
|
+
dataValidations?: ExtractedDataValidation[];
|
|
1164
|
+
freezePane?: {
|
|
1165
|
+
row: number;
|
|
1166
|
+
column: number;
|
|
1167
|
+
};
|
|
1168
|
+
autoFilter?: {
|
|
1169
|
+
range: string;
|
|
1170
|
+
};
|
|
1171
|
+
}
|
|
1172
|
+
export interface MergedCell {
|
|
1173
|
+
startRow: number;
|
|
1174
|
+
startColumn: number;
|
|
1175
|
+
endRow: number;
|
|
1176
|
+
endColumn: number;
|
|
1177
|
+
}
|
|
1178
|
+
export interface ExtractedCellComment {
|
|
1179
|
+
cell: string;
|
|
1180
|
+
author?: string;
|
|
1181
|
+
content: string;
|
|
1182
|
+
isResolved?: boolean;
|
|
1183
|
+
}
|
|
1184
|
+
export interface ExtractedConditionalFormat {
|
|
1185
|
+
range: string;
|
|
1186
|
+
type: string;
|
|
1187
|
+
priority: number;
|
|
1188
|
+
formula?: string;
|
|
1189
|
+
style?: CellStyle;
|
|
1190
|
+
}
|
|
1191
|
+
export interface ExtractedDataValidation {
|
|
1192
|
+
range: string;
|
|
1193
|
+
type: 'list' | 'whole' | 'decimal' | 'date' | 'time' | 'textLength' | 'custom';
|
|
1194
|
+
operator?: string;
|
|
1195
|
+
formula1?: string;
|
|
1196
|
+
formula2?: string;
|
|
1197
|
+
allowedValues?: unknown[];
|
|
1198
|
+
showDropdown?: boolean;
|
|
1199
|
+
showErrorMessage?: boolean;
|
|
1200
|
+
errorMessage?: string;
|
|
1201
|
+
}
|
|
1202
|
+
export interface ExtractedChart {
|
|
1203
|
+
id: string;
|
|
1204
|
+
name: string;
|
|
1205
|
+
type: string;
|
|
1206
|
+
title?: string;
|
|
1207
|
+
sheet: string;
|
|
1208
|
+
position: BoundingBox;
|
|
1209
|
+
data: {
|
|
1210
|
+
categories?: string[];
|
|
1211
|
+
series: ChartSeries[];
|
|
1212
|
+
};
|
|
1213
|
+
}
|
|
1214
|
+
export interface ChartSeries {
|
|
1215
|
+
name: string;
|
|
1216
|
+
values: number[];
|
|
1217
|
+
color?: string;
|
|
1218
|
+
}
|
|
1219
|
+
export interface ExtractedName {
|
|
1220
|
+
name: string;
|
|
1221
|
+
value: string;
|
|
1222
|
+
scope: string | 'workbook';
|
|
1223
|
+
comment?: string;
|
|
1224
|
+
}
|
|
1225
|
+
export interface ExtractedCellStyle {
|
|
1226
|
+
id: string;
|
|
1227
|
+
name?: string;
|
|
1228
|
+
font?: Partial<FontConfig>;
|
|
1229
|
+
fill?: {
|
|
1230
|
+
type: string;
|
|
1231
|
+
color?: string;
|
|
1232
|
+
};
|
|
1233
|
+
border?: Record<string, BorderStyle>;
|
|
1234
|
+
alignment?: Record<string, unknown>;
|
|
1235
|
+
numFmt?: string;
|
|
1236
|
+
}
|
|
1237
|
+
/**
|
|
1238
|
+
* CSV-specific extraction options
|
|
1239
|
+
*/
|
|
1240
|
+
export interface CSVExtractionOptions {
|
|
1241
|
+
delimiter?: string;
|
|
1242
|
+
quote?: string;
|
|
1243
|
+
escape?: string;
|
|
1244
|
+
hasHeaders?: boolean;
|
|
1245
|
+
encoding?: BufferEncoding;
|
|
1246
|
+
skipEmptyLines?: boolean;
|
|
1247
|
+
trimFields?: boolean;
|
|
1248
|
+
maxRows?: number;
|
|
1249
|
+
columns?: string[] | number[];
|
|
1250
|
+
transformValues?: boolean;
|
|
1251
|
+
}
|
|
1252
|
+
/**
|
|
1253
|
+
* CSV extraction result
|
|
1254
|
+
*/
|
|
1255
|
+
export interface CSVExtractionResult extends ExtractionResult {
|
|
1256
|
+
data: {
|
|
1257
|
+
headers?: string[];
|
|
1258
|
+
rows: unknown[][];
|
|
1259
|
+
records: Record<string, unknown>[];
|
|
1260
|
+
statistics: CSVStatistics;
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
export interface CSVStatistics {
|
|
1264
|
+
rowCount: number;
|
|
1265
|
+
columnCount: number;
|
|
1266
|
+
emptyRowCount: number;
|
|
1267
|
+
duplicateRowCount: number;
|
|
1268
|
+
columnTypes: Record<string, 'string' | 'number' | 'date' | 'boolean' | 'mixed'>;
|
|
1269
|
+
nullCounts: Record<string, number>;
|
|
1270
|
+
uniqueCounts: Record<string, number>;
|
|
1271
|
+
}
|
|
1272
|
+
/**
|
|
1273
|
+
* Image-specific extraction options
|
|
1274
|
+
*/
|
|
1275
|
+
export interface ImageExtractionOptions {
|
|
1276
|
+
extractMetadata?: boolean;
|
|
1277
|
+
extractExif?: boolean;
|
|
1278
|
+
extractIptc?: boolean;
|
|
1279
|
+
extractXmp?: boolean;
|
|
1280
|
+
extractColors?: boolean;
|
|
1281
|
+
extractText?: boolean;
|
|
1282
|
+
ocrLanguage?: string | string[];
|
|
1283
|
+
colorCount?: number;
|
|
1284
|
+
analyzeFaces?: boolean;
|
|
1285
|
+
analyzeObjects?: boolean;
|
|
1286
|
+
}
|
|
1287
|
+
/**
|
|
1288
|
+
* Image extraction result
|
|
1289
|
+
*/
|
|
1290
|
+
export interface ImageExtractionResult extends ExtractionResult {
|
|
1291
|
+
data: {
|
|
1292
|
+
dimensions: {
|
|
1293
|
+
width: number;
|
|
1294
|
+
height: number;
|
|
1295
|
+
};
|
|
1296
|
+
format: string;
|
|
1297
|
+
colorSpace: string;
|
|
1298
|
+
bitDepth: number;
|
|
1299
|
+
hasAlpha: boolean;
|
|
1300
|
+
isAnimated: boolean;
|
|
1301
|
+
frameCount?: number;
|
|
1302
|
+
metadata?: ImageMetadata;
|
|
1303
|
+
dominantColors?: DominantColor[];
|
|
1304
|
+
text?: ExtractedText;
|
|
1305
|
+
faces?: DetectedFace[];
|
|
1306
|
+
objects?: DetectedObject[];
|
|
1307
|
+
};
|
|
1308
|
+
}
|
|
1309
|
+
export interface DominantColor {
|
|
1310
|
+
color: string;
|
|
1311
|
+
hex: string;
|
|
1312
|
+
rgb: {
|
|
1313
|
+
r: number;
|
|
1314
|
+
g: number;
|
|
1315
|
+
b: number;
|
|
1316
|
+
};
|
|
1317
|
+
percentage: number;
|
|
1318
|
+
name?: string;
|
|
1319
|
+
}
|
|
1320
|
+
export interface DetectedFace {
|
|
1321
|
+
boundingBox: BoundingBox;
|
|
1322
|
+
confidence: number;
|
|
1323
|
+
landmarks?: FaceLandmark[];
|
|
1324
|
+
attributes?: FaceAttributes;
|
|
1325
|
+
}
|
|
1326
|
+
export interface FaceLandmark {
|
|
1327
|
+
type: string;
|
|
1328
|
+
x: number;
|
|
1329
|
+
y: number;
|
|
1330
|
+
}
|
|
1331
|
+
export interface FaceAttributes {
|
|
1332
|
+
age?: number;
|
|
1333
|
+
gender?: string;
|
|
1334
|
+
emotion?: Record<string, number>;
|
|
1335
|
+
}
|
|
1336
|
+
export interface DetectedObject {
|
|
1337
|
+
name: string;
|
|
1338
|
+
confidence: number;
|
|
1339
|
+
boundingBox: BoundingBox;
|
|
1340
|
+
category?: string;
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Document analysis options
|
|
1344
|
+
*/
|
|
1345
|
+
export interface AnalysisOptions {
|
|
1346
|
+
analyzeStructure?: boolean;
|
|
1347
|
+
analyzeContent?: boolean;
|
|
1348
|
+
analyzeStyle?: boolean;
|
|
1349
|
+
analyzeSecurity?: boolean;
|
|
1350
|
+
analyzeAccessibility?: boolean;
|
|
1351
|
+
analyzeQuality?: boolean;
|
|
1352
|
+
generateSummary?: boolean;
|
|
1353
|
+
extractKeywords?: boolean;
|
|
1354
|
+
extractEntities?: boolean;
|
|
1355
|
+
detectLanguage?: boolean;
|
|
1356
|
+
detectSentiment?: boolean;
|
|
1357
|
+
}
|
|
1358
|
+
/**
|
|
1359
|
+
* Document analysis result
|
|
1360
|
+
*/
|
|
1361
|
+
export interface AnalysisResult {
|
|
1362
|
+
documentInfo: DocumentMetadata;
|
|
1363
|
+
structure?: StructureAnalysis;
|
|
1364
|
+
content?: ContentAnalysis;
|
|
1365
|
+
style?: StyleAnalysis;
|
|
1366
|
+
security?: SecurityAnalysis;
|
|
1367
|
+
accessibility?: AccessibilityAnalysis;
|
|
1368
|
+
quality?: QualityAnalysis;
|
|
1369
|
+
summary?: DocumentSummary;
|
|
1370
|
+
}
|
|
1371
|
+
export interface StructureAnalysis {
|
|
1372
|
+
sections: number;
|
|
1373
|
+
chapters: number;
|
|
1374
|
+
headings: HeadingInfo[];
|
|
1375
|
+
tableOfContents?: ExtractedBookmark[];
|
|
1376
|
+
pageBreaks: number;
|
|
1377
|
+
columns: number;
|
|
1378
|
+
}
|
|
1379
|
+
export interface HeadingInfo {
|
|
1380
|
+
level: number;
|
|
1381
|
+
text: string;
|
|
1382
|
+
pageNumber?: number;
|
|
1383
|
+
count?: number;
|
|
1384
|
+
}
|
|
1385
|
+
export interface ContentAnalysis {
|
|
1386
|
+
textStatistics: TextStatistics;
|
|
1387
|
+
keywords: KeywordInfo[];
|
|
1388
|
+
entities?: ExtractedEntity[];
|
|
1389
|
+
topics?: TopicInfo[];
|
|
1390
|
+
language?: LanguageInfo;
|
|
1391
|
+
sentiment?: SentimentInfo;
|
|
1392
|
+
}
|
|
1393
|
+
export interface KeywordInfo {
|
|
1394
|
+
keyword: string;
|
|
1395
|
+
frequency: number;
|
|
1396
|
+
relevance?: number;
|
|
1397
|
+
score?: number;
|
|
1398
|
+
positions?: number[];
|
|
1399
|
+
}
|
|
1400
|
+
export interface ExtractedEntity {
|
|
1401
|
+
text: string;
|
|
1402
|
+
type: 'person' | 'organization' | 'location' | 'date' | 'money' | 'email' | 'phone' | 'url' | 'custom';
|
|
1403
|
+
confidence: number;
|
|
1404
|
+
positions?: number[];
|
|
1405
|
+
metadata?: Record<string, unknown>;
|
|
1406
|
+
}
|
|
1407
|
+
export interface TopicInfo {
|
|
1408
|
+
topic: string;
|
|
1409
|
+
confidence: number;
|
|
1410
|
+
keywords: string[];
|
|
1411
|
+
}
|
|
1412
|
+
export interface LanguageInfo {
|
|
1413
|
+
detected: string;
|
|
1414
|
+
confidence: number;
|
|
1415
|
+
alternatives?: Array<{
|
|
1416
|
+
language: string;
|
|
1417
|
+
confidence: number;
|
|
1418
|
+
}>;
|
|
1419
|
+
}
|
|
1420
|
+
export interface SentimentInfo {
|
|
1421
|
+
overall: 'positive' | 'negative' | 'neutral' | 'mixed';
|
|
1422
|
+
score: number;
|
|
1423
|
+
magnitude: number;
|
|
1424
|
+
breakdown?: Record<string, SentimentScore>;
|
|
1425
|
+
}
|
|
1426
|
+
export interface SentimentScore {
|
|
1427
|
+
sentiment: 'positive' | 'negative' | 'neutral';
|
|
1428
|
+
score: number;
|
|
1429
|
+
}
|
|
1430
|
+
export interface StyleAnalysis {
|
|
1431
|
+
fonts: FontUsage[];
|
|
1432
|
+
colors: ColorUsage[];
|
|
1433
|
+
paragraphStyles: ParagraphStyleUsage[];
|
|
1434
|
+
consistency: ConsistencyReport;
|
|
1435
|
+
}
|
|
1436
|
+
export interface FontUsage {
|
|
1437
|
+
fontFamily: string;
|
|
1438
|
+
usageCount: number;
|
|
1439
|
+
sizes: number[];
|
|
1440
|
+
styles: string[];
|
|
1441
|
+
}
|
|
1442
|
+
export interface ColorUsage {
|
|
1443
|
+
color: string;
|
|
1444
|
+
hex: string;
|
|
1445
|
+
usageCount: number;
|
|
1446
|
+
usedFor: ('text' | 'background' | 'border' | 'highlight')[];
|
|
1447
|
+
}
|
|
1448
|
+
export interface ParagraphStyleUsage {
|
|
1449
|
+
name: string;
|
|
1450
|
+
usageCount: number;
|
|
1451
|
+
}
|
|
1452
|
+
export interface ConsistencyReport {
|
|
1453
|
+
isConsistent: boolean;
|
|
1454
|
+
issues: ConsistencyIssue[];
|
|
1455
|
+
score: number;
|
|
1456
|
+
}
|
|
1457
|
+
export interface ConsistencyIssue {
|
|
1458
|
+
type: 'font' | 'size' | 'spacing' | 'alignment' | 'color';
|
|
1459
|
+
description: string;
|
|
1460
|
+
locations: number[];
|
|
1461
|
+
severity: 'low' | 'medium' | 'high';
|
|
1462
|
+
}
|
|
1463
|
+
export interface SecurityAnalysis {
|
|
1464
|
+
isEncrypted: boolean;
|
|
1465
|
+
encryptionType?: string;
|
|
1466
|
+
hasPassword: boolean;
|
|
1467
|
+
permissions?: DocumentPermissions;
|
|
1468
|
+
signatures?: DigitalSignature[];
|
|
1469
|
+
macros?: MacroInfo[];
|
|
1470
|
+
externalLinks?: ExtractedLink[];
|
|
1471
|
+
risks: SecurityRisk[];
|
|
1472
|
+
}
|
|
1473
|
+
export interface DocumentPermissions {
|
|
1474
|
+
canPrint: boolean;
|
|
1475
|
+
canModify: boolean;
|
|
1476
|
+
canCopy: boolean;
|
|
1477
|
+
canAnnotate: boolean;
|
|
1478
|
+
canFillForms: boolean;
|
|
1479
|
+
canExtract: boolean;
|
|
1480
|
+
canAssemble: boolean;
|
|
1481
|
+
}
|
|
1482
|
+
export interface DigitalSignature {
|
|
1483
|
+
signer: string;
|
|
1484
|
+
signedAt: Date;
|
|
1485
|
+
isValid: boolean;
|
|
1486
|
+
certificateInfo?: CertificateInfo;
|
|
1487
|
+
}
|
|
1488
|
+
export interface CertificateInfo {
|
|
1489
|
+
issuer: string;
|
|
1490
|
+
subject: string;
|
|
1491
|
+
validFrom: Date;
|
|
1492
|
+
validTo: Date;
|
|
1493
|
+
serialNumber: string;
|
|
1494
|
+
}
|
|
1495
|
+
export interface MacroInfo {
|
|
1496
|
+
name: string;
|
|
1497
|
+
type: string;
|
|
1498
|
+
isSafe: boolean;
|
|
1499
|
+
code?: string;
|
|
1500
|
+
}
|
|
1501
|
+
export interface SecurityRisk {
|
|
1502
|
+
type: string;
|
|
1503
|
+
severity: 'low' | 'medium' | 'high' | 'critical';
|
|
1504
|
+
description: string;
|
|
1505
|
+
recommendation: string;
|
|
1506
|
+
}
|
|
1507
|
+
export interface AccessibilityAnalysis {
|
|
1508
|
+
score: number;
|
|
1509
|
+
isAccessible: boolean;
|
|
1510
|
+
issues: AccessibilityIssue[];
|
|
1511
|
+
hasAltText: boolean;
|
|
1512
|
+
hasTableHeaders: boolean;
|
|
1513
|
+
hasDocumentTitle: boolean;
|
|
1514
|
+
hasLanguage: boolean;
|
|
1515
|
+
headingStructure: 'correct' | 'incorrect' | 'missing';
|
|
1516
|
+
readingOrder: 'correct' | 'incorrect' | 'unknown';
|
|
1517
|
+
}
|
|
1518
|
+
export interface AccessibilityIssue {
|
|
1519
|
+
type: string;
|
|
1520
|
+
wcagCriteria?: string;
|
|
1521
|
+
severity: 'minor' | 'moderate' | 'serious' | 'critical';
|
|
1522
|
+
description: string;
|
|
1523
|
+
element?: string;
|
|
1524
|
+
pageNumber?: number;
|
|
1525
|
+
recommendation: string;
|
|
1526
|
+
}
|
|
1527
|
+
export interface QualityAnalysis {
|
|
1528
|
+
overallScore: number;
|
|
1529
|
+
imageQuality: ImageQualityReport;
|
|
1530
|
+
textQuality: TextQualityReport;
|
|
1531
|
+
formattingQuality: FormattingQualityReport;
|
|
1532
|
+
recommendations: QualityRecommendation[];
|
|
1533
|
+
}
|
|
1534
|
+
export interface ImageQualityReport {
|
|
1535
|
+
totalImages: number;
|
|
1536
|
+
lowResolutionCount: number;
|
|
1537
|
+
optimalResolutionCount: number;
|
|
1538
|
+
averageDpi: number;
|
|
1539
|
+
totalSize: number;
|
|
1540
|
+
recommendations: string[];
|
|
1541
|
+
}
|
|
1542
|
+
export interface TextQualityReport {
|
|
1543
|
+
spellingErrors: number;
|
|
1544
|
+
grammarIssues: number;
|
|
1545
|
+
readabilityScore: number;
|
|
1546
|
+
inconsistencies: string[];
|
|
1547
|
+
}
|
|
1548
|
+
export interface FormattingQualityReport {
|
|
1549
|
+
orphanLines: number;
|
|
1550
|
+
widowLines: number;
|
|
1551
|
+
overflowingText: number;
|
|
1552
|
+
inconsistentSpacing: number;
|
|
1553
|
+
}
|
|
1554
|
+
export interface QualityRecommendation {
|
|
1555
|
+
area: 'images' | 'text' | 'formatting' | 'structure';
|
|
1556
|
+
issue: string;
|
|
1557
|
+
recommendation: string;
|
|
1558
|
+
priority: 'low' | 'medium' | 'high';
|
|
1559
|
+
}
|
|
1560
|
+
export interface DocumentSummary {
|
|
1561
|
+
title: string;
|
|
1562
|
+
abstract: string;
|
|
1563
|
+
keyPoints: string[];
|
|
1564
|
+
wordCount: number;
|
|
1565
|
+
estimatedReadTime: number;
|
|
1566
|
+
}
|
|
1567
|
+
/**
|
|
1568
|
+
* Document comparison options
|
|
1569
|
+
*/
|
|
1570
|
+
export interface ComparisonOptions {
|
|
1571
|
+
compareText?: boolean;
|
|
1572
|
+
compareFormatting?: boolean;
|
|
1573
|
+
compareImages?: boolean;
|
|
1574
|
+
compareTables?: boolean;
|
|
1575
|
+
compareMetadata?: boolean;
|
|
1576
|
+
ignoreWhitespace?: boolean;
|
|
1577
|
+
ignoreCase?: boolean;
|
|
1578
|
+
ignoreFormatting?: boolean;
|
|
1579
|
+
granularity?: 'character' | 'word' | 'sentence' | 'paragraph' | 'page';
|
|
1580
|
+
outputFormat?: 'detailed' | 'summary' | 'visual';
|
|
1581
|
+
}
|
|
1582
|
+
/**
|
|
1583
|
+
* Document comparison result
|
|
1584
|
+
*/
|
|
1585
|
+
export interface ComparisonResult {
|
|
1586
|
+
areIdentical: boolean;
|
|
1587
|
+
similarityScore: number;
|
|
1588
|
+
differences: DocumentDifference[];
|
|
1589
|
+
additions: DocumentChange[];
|
|
1590
|
+
deletions: DocumentChange[];
|
|
1591
|
+
modifications: DocumentChange[];
|
|
1592
|
+
summary: ComparisonSummary;
|
|
1593
|
+
}
|
|
1594
|
+
export interface DocumentDifference {
|
|
1595
|
+
type: 'addition' | 'deletion' | 'modification' | 'move' | 'format';
|
|
1596
|
+
location: {
|
|
1597
|
+
document: 'source' | 'target';
|
|
1598
|
+
pageNumber?: number;
|
|
1599
|
+
position?: number;
|
|
1600
|
+
path?: string;
|
|
1601
|
+
};
|
|
1602
|
+
content: {
|
|
1603
|
+
original?: string;
|
|
1604
|
+
modified?: string;
|
|
1605
|
+
};
|
|
1606
|
+
metadata?: Record<string, unknown>;
|
|
1607
|
+
}
|
|
1608
|
+
export interface DocumentChange {
|
|
1609
|
+
type: 'text' | 'image' | 'table' | 'style' | 'metadata';
|
|
1610
|
+
content: string;
|
|
1611
|
+
location: string;
|
|
1612
|
+
details?: Record<string, unknown>;
|
|
1613
|
+
}
|
|
1614
|
+
export interface ComparisonSummary {
|
|
1615
|
+
totalChanges: number;
|
|
1616
|
+
addedCharacters: number;
|
|
1617
|
+
deletedCharacters: number;
|
|
1618
|
+
addedWords: number;
|
|
1619
|
+
deletedWords: number;
|
|
1620
|
+
modifiedParagraphs: number;
|
|
1621
|
+
addedImages: number;
|
|
1622
|
+
deletedImages: number;
|
|
1623
|
+
modifiedTables: number;
|
|
1624
|
+
}
|
|
1625
|
+
/**
|
|
1626
|
+
* Search options
|
|
1627
|
+
*/
|
|
1628
|
+
export interface SearchOptions {
|
|
1629
|
+
query?: string;
|
|
1630
|
+
caseSensitive?: boolean;
|
|
1631
|
+
wholeWord?: boolean;
|
|
1632
|
+
regex?: boolean;
|
|
1633
|
+
useRegex?: boolean;
|
|
1634
|
+
fuzzy?: boolean;
|
|
1635
|
+
fuzzyMatch?: boolean;
|
|
1636
|
+
fuzzyThreshold?: number;
|
|
1637
|
+
maxResults?: number;
|
|
1638
|
+
highlightMatches?: boolean;
|
|
1639
|
+
includeContext?: boolean;
|
|
1640
|
+
contextLength?: number;
|
|
1641
|
+
highlightTag?: string;
|
|
1642
|
+
searchIn?: ('text' | 'metadata' | 'comments' | 'annotations')[];
|
|
1643
|
+
searchFields?: string[];
|
|
1644
|
+
fileTypes?: FileFormat[];
|
|
1645
|
+
dateRange?: {
|
|
1646
|
+
from?: Date;
|
|
1647
|
+
to?: Date;
|
|
1648
|
+
};
|
|
1649
|
+
sortBy?: 'relevance' | 'date' | 'name' | 'size' | 'position' | 'document';
|
|
1650
|
+
sortOrder?: 'asc' | 'desc' | 'ascending' | 'descending';
|
|
1651
|
+
}
|
|
1652
|
+
/**
|
|
1653
|
+
* Search result
|
|
1654
|
+
*/
|
|
1655
|
+
export interface SearchResult {
|
|
1656
|
+
query?: string;
|
|
1657
|
+
totalMatches?: number;
|
|
1658
|
+
totalHits?: number;
|
|
1659
|
+
hits?: Array<{
|
|
1660
|
+
id: string;
|
|
1661
|
+
documentId: string;
|
|
1662
|
+
documentName: string;
|
|
1663
|
+
matchedText: string;
|
|
1664
|
+
context: string;
|
|
1665
|
+
startPosition: number;
|
|
1666
|
+
endPosition: number;
|
|
1667
|
+
lineNumber?: number;
|
|
1668
|
+
pageNumber?: number;
|
|
1669
|
+
section?: string;
|
|
1670
|
+
score: number;
|
|
1671
|
+
highlightedSnippet: string;
|
|
1672
|
+
}>;
|
|
1673
|
+
documents?: DocumentSearchResult[];
|
|
1674
|
+
facets?: SearchFacets | Record<string, Array<{
|
|
1675
|
+
value: string;
|
|
1676
|
+
count: number;
|
|
1677
|
+
}>>;
|
|
1678
|
+
suggestions?: string[];
|
|
1679
|
+
duration?: number;
|
|
1680
|
+
searchDuration?: number;
|
|
1681
|
+
}
|
|
1682
|
+
export interface DocumentSearchResult {
|
|
1683
|
+
file: string;
|
|
1684
|
+
format: FileFormat;
|
|
1685
|
+
matches: SearchMatch[];
|
|
1686
|
+
score: number;
|
|
1687
|
+
highlights?: string[];
|
|
1688
|
+
metadata?: DocumentMetadata;
|
|
1689
|
+
}
|
|
1690
|
+
export interface SearchMatch {
|
|
1691
|
+
content: string;
|
|
1692
|
+
context: string;
|
|
1693
|
+
position: {
|
|
1694
|
+
start: number;
|
|
1695
|
+
end: number;
|
|
1696
|
+
pageNumber?: number;
|
|
1697
|
+
lineNumber?: number;
|
|
1698
|
+
};
|
|
1699
|
+
score: number;
|
|
1700
|
+
type: 'text' | 'metadata' | 'comment' | 'annotation';
|
|
1701
|
+
}
|
|
1702
|
+
export interface SearchFacets {
|
|
1703
|
+
formats: Record<string, number>;
|
|
1704
|
+
dates: Record<string, number>;
|
|
1705
|
+
authors: Record<string, number>;
|
|
1706
|
+
keywords: Record<string, number>;
|
|
1707
|
+
}
|
|
1708
|
+
/**
|
|
1709
|
+
* Index options for search optimization
|
|
1710
|
+
*/
|
|
1711
|
+
export interface IndexOptions {
|
|
1712
|
+
fields?: string[];
|
|
1713
|
+
stemming?: boolean;
|
|
1714
|
+
stopWords?: string[] | boolean;
|
|
1715
|
+
minWordLength?: number;
|
|
1716
|
+
maxWordLength?: number;
|
|
1717
|
+
boost?: Record<string, number>;
|
|
1718
|
+
analyzers?: Record<string, TextAnalyzer>;
|
|
1719
|
+
}
|
|
1720
|
+
export interface TextAnalyzer {
|
|
1721
|
+
tokenizer: 'standard' | 'whitespace' | 'letter' | 'custom';
|
|
1722
|
+
filters: Array<'lowercase' | 'stopwords' | 'stemmer' | 'synonyms' | 'ngram'>;
|
|
1723
|
+
customTokenizer?: (text: string) => string[];
|
|
1724
|
+
}
|
|
1725
|
+
/**
|
|
1726
|
+
* Batch job configuration
|
|
1727
|
+
*/
|
|
1728
|
+
export interface BatchJobConfig {
|
|
1729
|
+
id?: string;
|
|
1730
|
+
name?: string;
|
|
1731
|
+
files?: Array<string | Buffer | {
|
|
1732
|
+
data: InputDataType;
|
|
1733
|
+
name: string;
|
|
1734
|
+
}>;
|
|
1735
|
+
operation?: 'convert' | 'extract' | 'analyze' | 'compare' | 'search';
|
|
1736
|
+
options?: ConvertFileOptions | PDFExtractionOptions | AnalysisOptions | ComparisonOptions | SearchOptions;
|
|
1737
|
+
defaultOptions?: Record<string, unknown>;
|
|
1738
|
+
outputDir?: string;
|
|
1739
|
+
outputDirectory?: string;
|
|
1740
|
+
concurrency?: number;
|
|
1741
|
+
retries?: number;
|
|
1742
|
+
retryAttempts?: number;
|
|
1743
|
+
retryDelay?: number;
|
|
1744
|
+
timeout?: number;
|
|
1745
|
+
continueOnError?: boolean;
|
|
1746
|
+
onProgress?: (progress: BatchProgress) => void;
|
|
1747
|
+
onFileComplete?: (result: BatchFileResult) => void;
|
|
1748
|
+
onError?: (error: Error, file: string) => void;
|
|
1749
|
+
}
|
|
1750
|
+
/**
|
|
1751
|
+
* Batch job status
|
|
1752
|
+
*/
|
|
1753
|
+
export type BatchJobStatus = 'pending' | 'processing' | 'paused' | 'completed' | 'failed' | 'cancelled';
|
|
1754
|
+
/**
|
|
1755
|
+
* Batch progress information
|
|
1756
|
+
*/
|
|
1757
|
+
export interface BatchProgress {
|
|
1758
|
+
jobId: string;
|
|
1759
|
+
totalFiles: number;
|
|
1760
|
+
completedFiles: number;
|
|
1761
|
+
failedFiles: number;
|
|
1762
|
+
currentFile?: string;
|
|
1763
|
+
progress: number;
|
|
1764
|
+
estimatedTimeRemaining?: number;
|
|
1765
|
+
startedAt: Date;
|
|
1766
|
+
status: 'pending' | 'running' | 'paused' | 'completed' | 'failed' | 'cancelled';
|
|
1767
|
+
}
|
|
1768
|
+
/**
|
|
1769
|
+
* Batch file result
|
|
1770
|
+
*/
|
|
1771
|
+
export interface BatchFileResult {
|
|
1772
|
+
file: string;
|
|
1773
|
+
success: boolean;
|
|
1774
|
+
result?: ConversionResult | ExtractionResult | AnalysisResult;
|
|
1775
|
+
error?: Error;
|
|
1776
|
+
duration: number;
|
|
1777
|
+
outputPath?: string;
|
|
1778
|
+
}
|
|
1779
|
+
/**
|
|
1780
|
+
* Batch job result
|
|
1781
|
+
*/
|
|
1782
|
+
export interface BatchJobResult {
|
|
1783
|
+
jobId: string;
|
|
1784
|
+
success?: boolean;
|
|
1785
|
+
status?: 'completed' | 'partial' | 'failed';
|
|
1786
|
+
totalFiles?: number;
|
|
1787
|
+
totalItems?: number;
|
|
1788
|
+
successCount?: number;
|
|
1789
|
+
successfulItems?: number;
|
|
1790
|
+
failedCount?: number;
|
|
1791
|
+
failedItems?: number;
|
|
1792
|
+
results?: BatchFileResult[] | unknown[];
|
|
1793
|
+
errors?: Array<{
|
|
1794
|
+
file?: string;
|
|
1795
|
+
itemId?: string;
|
|
1796
|
+
error: string | Error;
|
|
1797
|
+
}>;
|
|
1798
|
+
duration?: number;
|
|
1799
|
+
totalDuration?: number;
|
|
1800
|
+
averageItemDuration?: number;
|
|
1801
|
+
startedAt?: Date;
|
|
1802
|
+
startTime?: Date;
|
|
1803
|
+
completedAt?: Date;
|
|
1804
|
+
endTime?: Date;
|
|
1805
|
+
}
|
|
1806
|
+
/**
|
|
1807
|
+
* Stream processing options
|
|
1808
|
+
*/
|
|
1809
|
+
export interface StreamProcessingOptions {
|
|
1810
|
+
chunkSize?: number;
|
|
1811
|
+
highWaterMark?: number;
|
|
1812
|
+
encoding?: BufferEncoding;
|
|
1813
|
+
onChunk?: (chunk: Buffer, index: number) => void | Promise<void>;
|
|
1814
|
+
onProgress?: (bytesProcessed: number, totalBytes?: number) => void;
|
|
1815
|
+
transform?: (chunk: Buffer) => Buffer | Promise<Buffer>;
|
|
1816
|
+
filter?: (chunk: Buffer) => boolean;
|
|
1817
|
+
maxMemory?: number;
|
|
1818
|
+
tempDir?: string;
|
|
1819
|
+
pauseOnBackpressure?: boolean;
|
|
1820
|
+
emitProgress?: boolean;
|
|
1821
|
+
progressInterval?: number;
|
|
1822
|
+
}
|
|
1823
|
+
/**
|
|
1824
|
+
* Stream result
|
|
1825
|
+
*/
|
|
1826
|
+
export interface StreamResult {
|
|
1827
|
+
stream: ReadableStream<Uint8Array>;
|
|
1828
|
+
metadata: {
|
|
1829
|
+
totalSize?: number;
|
|
1830
|
+
chunkCount?: number;
|
|
1831
|
+
format: FileFormat;
|
|
1832
|
+
mimeType: string;
|
|
1833
|
+
};
|
|
1834
|
+
abort: () => void;
|
|
1835
|
+
}
|
|
1836
|
+
/**
|
|
1837
|
+
* Extractor plugin interface
|
|
1838
|
+
*/
|
|
1839
|
+
export interface ExtractorPlugin {
|
|
1840
|
+
name: string;
|
|
1841
|
+
version: string;
|
|
1842
|
+
supportedFormats: FileFormat[];
|
|
1843
|
+
extract(data: InputDataType, options: Record<string, unknown>): Promise<ExtractionResult>;
|
|
1844
|
+
canExtract?(data: InputDataType): boolean;
|
|
1845
|
+
}
|
|
1846
|
+
/**
|
|
1847
|
+
* Analyzer plugin interface
|
|
1848
|
+
*/
|
|
1849
|
+
export interface AnalyzerPlugin {
|
|
1850
|
+
name: string;
|
|
1851
|
+
version: string;
|
|
1852
|
+
supportedFormats: FileFormat[];
|
|
1853
|
+
analyze(data: InputDataType, options: AnalysisOptions): Promise<AnalysisResult>;
|
|
1854
|
+
}
|
|
1855
|
+
/**
|
|
1856
|
+
* Processor plugin interface (for transformations)
|
|
1857
|
+
*/
|
|
1858
|
+
export interface ProcessorPlugin {
|
|
1859
|
+
name: string;
|
|
1860
|
+
version: string;
|
|
1861
|
+
supportedFormats: FileFormat[];
|
|
1862
|
+
process(data: Buffer, options: Record<string, unknown>): Promise<Buffer>;
|
|
1863
|
+
priority?: number;
|
|
1864
|
+
}
|
|
1865
|
+
/**
|
|
1866
|
+
* Hook types for extensibility
|
|
1867
|
+
*/
|
|
1868
|
+
export interface ConvertitHooks {
|
|
1869
|
+
beforeExtract?: (data: InputDataType, options: Record<string, unknown>) => Promise<InputDataType>;
|
|
1870
|
+
afterExtract?: <T>(result: ExtractionResult<T>) => Promise<ExtractionResult<T>>;
|
|
1871
|
+
beforeAnalyze?: (data: InputDataType, options: AnalysisOptions) => Promise<InputDataType>;
|
|
1872
|
+
afterAnalyze?: (result: AnalysisResult) => Promise<AnalysisResult>;
|
|
1873
|
+
beforeBatch?: (config: BatchJobConfig) => Promise<BatchJobConfig>;
|
|
1874
|
+
afterBatch?: (result: BatchJobResult) => Promise<BatchJobResult>;
|
|
1875
|
+
onError?: (error: Error, context: Record<string, unknown>) => void;
|
|
1876
|
+
}
|
|
1877
|
+
/**
|
|
1878
|
+
* Extractor configuration
|
|
1879
|
+
*/
|
|
1880
|
+
export interface ExtractorConfig {
|
|
1881
|
+
tempDir?: string;
|
|
1882
|
+
maxFileSize?: number;
|
|
1883
|
+
timeout?: number;
|
|
1884
|
+
cache?: boolean;
|
|
1885
|
+
cacheDir?: string;
|
|
1886
|
+
cacheTTL?: number;
|
|
1887
|
+
verbose?: boolean;
|
|
1888
|
+
plugins?: ExtractorPlugin[];
|
|
1889
|
+
}
|
|
1890
|
+
/**
|
|
1891
|
+
* Universal extraction options
|
|
1892
|
+
*/
|
|
1893
|
+
export interface UniversalExtractionOptions {
|
|
1894
|
+
pdf?: PDFExtractionOptions;
|
|
1895
|
+
word?: WordExtractionOptions;
|
|
1896
|
+
excel?: ExcelExtractionOptions;
|
|
1897
|
+
csv?: CSVExtractionOptions;
|
|
1898
|
+
image?: ImageExtractionOptions;
|
|
1899
|
+
autoDetect?: boolean;
|
|
1900
|
+
fallbackFormat?: FileFormat;
|
|
1901
|
+
}
|
|
702
1902
|
//# sourceMappingURL=types.d.ts.map
|