reykit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reykit/ros.py ADDED
@@ -0,0 +1,1917 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2023-05-09 15:30:10
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Operation system methods.
9
+ """
10
+
11
+
12
+ from __future__ import annotations
13
+ from typing import Any, Union, Literal, Optional, NoReturn, overload
14
+ from io import TextIOBase, BufferedIOBase
15
+ from os import (
16
+ walk as os_walk,
17
+ listdir as os_listdir,
18
+ makedirs as os_makedirs,
19
+ renames as os_renames,
20
+ remove as os_remove
21
+ )
22
+ from os.path import (
23
+ abspath as os_abspath,
24
+ join as os_join,
25
+ isfile as os_isfile,
26
+ isdir as os_isdir,
27
+ basename as os_basename,
28
+ dirname as os_dirname,
29
+ exists as os_exists,
30
+ getsize as os_getsize,
31
+ getctime as os_getctime,
32
+ getmtime as os_getmtime,
33
+ getatime as os_getatime,
34
+ split as os_split,
35
+ splitext as os_splitext,
36
+ splitdrive as os_splitdrive
37
+ )
38
+ from shutil import copy as shutil_copy
39
+ from json import JSONDecodeError
40
+ from tomllib import loads as tomllib_loads
41
+ from hashlib import md5 as hashlib_md5
42
+ from tempfile import TemporaryFile, TemporaryDirectory
43
+ from docx import Document as docx_document
44
+ from docx.document import Document
45
+ from docx.text.paragraph import Paragraph
46
+ from docx.table import Table
47
+ from docx.oxml.text.paragraph import CT_P
48
+ from docx.oxml.table import CT_Tbl
49
+ from lxml.etree import ElementChildIterator
50
+ from pdfplumber import open as pdfplumber_open
51
+
52
+ from .rexception import throw
53
+ from .rregex import search, sub
54
+ from .rsystem import dos_command
55
+ from .rtext import to_json
56
+
57
+
58
+ __all__ = (
59
+ 'get_md5',
60
+ 'create_folder',
61
+ 'find_relpath',
62
+ 'get_file_str',
63
+ 'get_file_bytes',
64
+ 'read_toml',
65
+ 'RFile',
66
+ 'RFolder',
67
+ 'RTempFile',
68
+ 'RTempFolder',
69
+ 'doc_to_docx',
70
+ 'extract_docx_content',
71
+ 'extract_pdf_content',
72
+ 'extract_file_content'
73
+ )
74
+
75
+
76
+ type FilePath = str
77
+ type FileText = str
78
+ type FileData = bytes
79
+ type FileStr = Union[FilePath, FileText, TextIOBase]
80
+ type FileBytes = Union[FilePath, FileData, BufferedIOBase]
81
+ type File = Union[FileStr, FileBytes]
82
+
83
+
84
+ def get_md5(file: Union[str, bytes]) -> str:
85
+ """
86
+ Get file MD5.
87
+
88
+ Parameters
89
+ ----------
90
+ file : File path or file bytes.
91
+
92
+ Returns
93
+ -------
94
+ File MD5.
95
+ """
96
+
97
+ # Get bytes.
98
+ match file:
99
+
100
+ ## Path.
101
+ case str():
102
+ rfile = RFile(file)
103
+ file_bytes = rfile.bytes
104
+
105
+ ## Bytes.
106
+ case bytes() | bytearray():
107
+ file_bytes = file
108
+
109
+ # Calculate.
110
+ hash = hashlib_md5(file_bytes)
111
+ md5 = hash.hexdigest()
112
+
113
+ return md5
114
+
115
+
116
+ def create_folder(*paths: str, report: bool = False) -> None:
117
+ """
118
+ Create folders.
119
+
120
+ Parameters
121
+ ----------
122
+ paths : Folder paths.
123
+ report : Whether report the creation process.
124
+ """
125
+
126
+ # Create.
127
+ for path in paths:
128
+ rfolder = RFolder(path)
129
+ rfolder.create(report)
130
+
131
+
132
+ def find_relpath(abspath: str, relpath: str) -> str:
133
+ """
134
+ Based absolute path and symbol `.` of relative path, find a new absolute path.
135
+
136
+ Parameters
137
+ ----------
138
+ abspath : Original absolute path.
139
+ relpath : relative path.
140
+
141
+ Returns
142
+ -------
143
+ New absolute path.
144
+
145
+ Examples
146
+ --------
147
+ >>> old_abspath = os.getcwd()
148
+ >>> relpath = '../Folder4/File.txt'
149
+ >>> new_abspath = convert_relpath(old_abspath, relpath)
150
+ >>> old_abspath
151
+ C:/Folder1/Folder2/Folder3
152
+ >>> new_abspath
153
+ C:/Folder1/Folder4/File.txt
154
+ """
155
+
156
+ # Get parameter.
157
+ level = 0
158
+ for char in relpath:
159
+ if char == '.':
160
+ level += 1
161
+ else:
162
+ break
163
+ strip_n = 0
164
+ for char in relpath[level:]:
165
+ if char in ('/', '\\'):
166
+ strip_n += 1
167
+ else:
168
+ break
169
+
170
+ # Convert.
171
+ folder_path = abspath
172
+ for _ in range(level):
173
+ folder_path, _ = os_split(folder_path)
174
+ relpath = relpath[level + strip_n:]
175
+ path = os_join(folder_path, relpath)
176
+
177
+ return path
178
+
179
+
180
+ def get_file_str(file: FileStr) -> str:
181
+ """
182
+ Get file string data.
183
+
184
+ Parameters
185
+ ----------
186
+ file : File source.
187
+ - `'str' and path`: Return this string data.
188
+ - `'str' and not path`: As a file path read string data.
189
+ - `TextIOBase`: Read string data.
190
+
191
+ Returns
192
+ -------
193
+ File string data.
194
+ """
195
+
196
+ match file:
197
+
198
+ # Path or string.
199
+ case str():
200
+ exist = os_exists(file)
201
+
202
+ # Path.
203
+ if exist:
204
+ rfile = RFile(file)
205
+ file_str = rfile.str
206
+
207
+ # String.
208
+ else:
209
+ file_str = file
210
+
211
+ # IO.
212
+ case TextIOBase():
213
+ file_str = file.read()
214
+
215
+ # Throw exception.
216
+ case _:
217
+ throw(TypeError, file)
218
+
219
+ return file_str
220
+
221
+
222
+ @overload
223
+ def get_file_bytes(file: FileBytes) -> bytes: ...
224
+
225
+ @overload
226
+ def get_file_bytes(file: Any) -> NoReturn: ...
227
+
228
+ def get_file_bytes(file: FileBytes) -> bytes:
229
+ """
230
+ Get file bytes data.
231
+
232
+ Parameters
233
+ ----------
234
+ file : File source.
235
+ - `bytes`: Return this bytes data.
236
+ - `str`: As a file path read bytes data.
237
+ - `BufferedIOBase`: Read bytes data.
238
+
239
+ Returns
240
+ -------
241
+ File bytes data.
242
+ """
243
+
244
+ match file:
245
+
246
+ # Bytes.
247
+ case bytes():
248
+ file_bytes = file
249
+ case bytearray():
250
+ file_bytes = bytes(file)
251
+
252
+ # Path.
253
+ case str():
254
+ rfile = RFile(file)
255
+ file_bytes = rfile.bytes
256
+
257
+ # IO.
258
+ case BufferedIOBase():
259
+ file_bytes = file.read()
260
+
261
+ # Throw exception.
262
+ case _:
263
+ throw(TypeError, file)
264
+
265
+ return file_bytes
266
+
267
+
268
+ def read_toml(path: Union[str, RFile]) -> dict[str, Any]:
269
+ """
270
+ Read and parse TOML file.
271
+ Treat nan as a None or null value.
272
+
273
+ Parameters
274
+ ----------
275
+ path : File path or RFile object.
276
+
277
+ Returns
278
+ -------
279
+ Parameter dictionary.
280
+ """
281
+
282
+ # Read.
283
+ match path:
284
+
285
+ ## File path.
286
+ case str():
287
+ rfile = RFile(path)
288
+ text = rfile.str
289
+
290
+ ## RFile object.
291
+ case RFile():
292
+ text = rfile.str
293
+
294
+ # Parse.
295
+
296
+ ## Handle nan.
297
+ parse_float = lambda float_str: None if float_str == "nan" else float_str
298
+
299
+ params = tomllib_loads(text, parse_float=parse_float)
300
+
301
+ return params
302
+
303
+
304
+ class RFile(object):
305
+ """
306
+ Rey's `file` type.
307
+ """
308
+
309
+
310
+ def __init__(
311
+ self,
312
+ path: str
313
+ ) -> None:
314
+ """
315
+ Build `file` attributes.
316
+
317
+ Parameters
318
+ ----------
319
+ path : File path.
320
+ """
321
+
322
+ # Set attribute.
323
+ self.path = os_abspath(path)
324
+
325
+
326
+ @overload
327
+ def read(
328
+ self,
329
+ type_: Literal['bytes'] = 'bytes'
330
+ ) -> bytes: ...
331
+
332
+ @overload
333
+ def read(
334
+ self,
335
+ type_: Literal['str'] = 'bytes'
336
+ ) -> str: ...
337
+
338
+ def read(
339
+ self,
340
+ type_: Literal['str', 'bytes'] = 'bytes'
341
+ ) -> Union[bytes, str]:
342
+ """
343
+ Read file data.
344
+
345
+ Parameters
346
+ ----------
347
+ type_ : File data type.
348
+ - `Literal['bytes']`: Return file bytes data.
349
+ - `Literal['str']`: Return file string data.
350
+
351
+ Returns
352
+ -------
353
+ File data.
354
+ """
355
+
356
+ # Handle parameter.
357
+ match type_:
358
+ case 'bytes':
359
+ mode = 'rb'
360
+ encoding = None
361
+ case 'str':
362
+ mode = 'r'
363
+ encoding='utf-8'
364
+
365
+ # Read.
366
+ with open(self.path, mode, encoding=encoding) as file:
367
+ content = file.read()
368
+
369
+ return content
370
+
371
+
372
+ def write(
373
+ self,
374
+ data: Optional[Any] = '',
375
+ append: bool = False
376
+ ) -> None:
377
+ """
378
+ Write file data.
379
+
380
+ Parameters
381
+ ----------
382
+ data : Write data.
383
+ - `str`: File text.
384
+ - `bytes`: File bytes data.
385
+ - `Any`: To JSON format or string.
386
+ append : Whether append data, otherwise overwrite data.
387
+ """
388
+
389
+ # Handle parameter.
390
+
391
+ ## Write mode.
392
+ if append:
393
+ mode = 'a'
394
+ else:
395
+ mode = 'w'
396
+ if data.__class__ in (bytes, bytearray):
397
+ mode += 'b'
398
+
399
+ ## Convert data to string.
400
+ if data.__class__ not in (str, bytes, bytearray):
401
+ try:
402
+ data = to_json(data)
403
+ except (JSONDecodeError, TypeError):
404
+ data = str(data)
405
+
406
+ # Write.
407
+ with open(self.path, mode) as file:
408
+ file.write(data)
409
+
410
+
411
+ def copy(
412
+ self,
413
+ path: str
414
+ ) -> None:
415
+ """
416
+ Copy file to path.
417
+
418
+ Parameters
419
+ ----------
420
+ path : Copy path.
421
+ """
422
+
423
+ # Copy.
424
+ shutil_copy(
425
+ self.path,
426
+ path
427
+ )
428
+
429
+
430
+ def move(
431
+ self,
432
+ path: str
433
+ ) -> None:
434
+ """
435
+ Move file to path.
436
+
437
+ Parameters
438
+ ----------
439
+ path : Move path.
440
+ """
441
+
442
+ # Move.
443
+ os_renames(
444
+ self.path,
445
+ path
446
+ )
447
+
448
+
449
+ def remove(
450
+ self
451
+ ) -> None:
452
+ """
453
+ Remove file.
454
+ """
455
+
456
+ # Copy.
457
+ try:
458
+ os_remove(self.path)
459
+
460
+ # Read only.
461
+ except PermissionError:
462
+ command = f'attrib -r "{self.path}"'
463
+ dos_command(command)
464
+ os_remove(self.path)
465
+
466
+
467
+ @property
468
+ def str(self) -> str:
469
+ """
470
+ Read content as a string.
471
+
472
+ Returns
473
+ -------
474
+ File string content.
475
+ """
476
+
477
+ # Read.
478
+ file_str = self.read('str')
479
+
480
+ return file_str
481
+
482
+
483
+ @property
484
+ def bytes(self) -> bytes:
485
+ """
486
+ Read content in byte form.
487
+
488
+ Returns
489
+ -------
490
+ File bytes content.
491
+ """
492
+
493
+ # Read.
494
+ file_bytes = self.read('bytes')
495
+
496
+ return file_bytes
497
+
498
+
499
+ @property
500
+ def name_suffix(self) -> str:
501
+ """
502
+ Return file name with suffix.
503
+
504
+ Returns
505
+ -------
506
+ File name with suffix.
507
+ """
508
+
509
+ # Get.
510
+ file_name_suffix = os_basename(self.path)
511
+
512
+ return file_name_suffix
513
+
514
+
515
+ @property
516
+ def name(self) -> str:
517
+ """
518
+ Return file name not with suffix.
519
+
520
+ Returns
521
+ -------
522
+ File name not with suffix.
523
+ """
524
+
525
+ # Get.
526
+ file_name, _ = os_splitext(self.name_suffix)
527
+
528
+ return file_name
529
+
530
+
531
+ @property
532
+ def suffix(self) -> str:
533
+ """
534
+ Return file suffix.
535
+
536
+ Returns
537
+ -------
538
+ File suffix.
539
+ """
540
+
541
+ # Get.
542
+ _, file_suffix = os_splitext(self.path)
543
+
544
+ return file_suffix
545
+
546
+
547
+ @property
548
+ def dir(self) -> str:
549
+ """
550
+ Return file directory.
551
+
552
+ Returns
553
+ -------
554
+ File directory.
555
+ """
556
+
557
+ # Get.
558
+ file_dir = os_dirname(self.path)
559
+
560
+ return file_dir
561
+
562
+
563
+ @property
564
+ def drive(self) -> str:
565
+ """
566
+ Return file drive letter.
567
+
568
+ Returns
569
+ -------
570
+ File drive letter.
571
+ """
572
+
573
+ # Get.
574
+ file_drive, _ = os_splitdrive(self.path)
575
+
576
+ return file_drive
577
+
578
+
579
+ @property
580
+ def size(self) -> int:
581
+ """
582
+ Return file byte size.
583
+
584
+ Returns
585
+ -------
586
+ File byte size.
587
+ """
588
+
589
+ # Get.
590
+ file_size = os_getsize(self.path)
591
+
592
+ return file_size
593
+
594
+
595
+ @property
596
+ def ctime(self) -> float:
597
+ """
598
+ Return file create timestamp.
599
+
600
+ Returns
601
+ -------
602
+ File create timestamp.
603
+ """
604
+
605
+ # Get.
606
+ file_ctime = os_getctime(self.path)
607
+
608
+ return file_ctime
609
+
610
+
611
+ @property
612
+ def mtime(self) -> float:
613
+ """
614
+ Return file modify timestamp.
615
+
616
+ Returns
617
+ -------
618
+ File modify timestamp.
619
+ """
620
+
621
+ # Get.
622
+ file_mtime = os_getmtime(self.path)
623
+
624
+ return file_mtime
625
+
626
+
627
+ @property
628
+ def atime(self) -> float:
629
+ """
630
+ Return file access timestamp.
631
+
632
+ Returns
633
+ -------
634
+ File access timestamp.
635
+ """
636
+
637
+ # Get.
638
+ file_atime = os_getatime(self.path)
639
+
640
+ return file_atime
641
+
642
+
643
+ @property
644
+ def md5(self) -> float:
645
+ """
646
+ Return file MD5 value.
647
+
648
+ Returns
649
+ -------
650
+ File MD5 value
651
+ """
652
+
653
+ # Get.
654
+ file_md5 = get_md5(self.path)
655
+
656
+ return file_md5
657
+
658
+
659
+ @property
660
+ def toml(self) -> dict[str, Any]:
661
+ """
662
+ Read and parse TOML file.
663
+ Treat nan as a None or null value.
664
+
665
+ Returns
666
+ -------
667
+ Parameter dictionary.
668
+ """
669
+
670
+ # Read and parse.
671
+ params = read_toml(self.path)
672
+
673
+ return params
674
+
675
+
676
+ def __bool__(self) -> bool:
677
+ """
678
+ Judge if exist.
679
+
680
+ Returns
681
+ -------
682
+ Judge result.
683
+ """
684
+
685
+ # Judge.
686
+ file_exist = os_isfile(self.path)
687
+
688
+ return file_exist
689
+
690
+
691
+ def __len__(self) -> int:
692
+ """
693
+ Return file byte size.
694
+
695
+ Returns
696
+ -------
697
+ File byte size.
698
+ """
699
+
700
+ # Get.
701
+ file_size = self.size
702
+
703
+ return file_size
704
+
705
+
706
+ def __str__(self) -> str:
707
+ """
708
+ Read content as a string.
709
+
710
+ Returns
711
+ -------
712
+ File string content.
713
+ """
714
+
715
+ # Read.
716
+ file_text = self.str
717
+
718
+ return file_text
719
+
720
+
721
+ def __bytes__(self) -> bytes:
722
+ """
723
+ Read content in byte form.
724
+
725
+ Returns
726
+ -------
727
+ File bytes content.
728
+ """
729
+
730
+ # Read.
731
+ file_bytes = self.bytes
732
+
733
+ return file_bytes
734
+
735
+
736
+ def __contains__(
737
+ self,
738
+ value: Union[str, bytes]
739
+ ) -> bool:
740
+ """
741
+ Judge if file text contain value.
742
+
743
+ Parameters
744
+ ----------
745
+ value : Judge value.
746
+
747
+ Returns
748
+ -------
749
+ Judge result.
750
+ """
751
+
752
+ # Get parameter.
753
+ match value:
754
+ case str():
755
+ content = self.str
756
+ case bytes() | bytearray():
757
+ content = self.bytes
758
+ case _:
759
+ throw(TypeError, value)
760
+
761
+ # Judge.
762
+ judge = value in content
763
+
764
+ return judge
765
+
766
+
767
+ __call__ = write
768
+
769
+
770
+ class RFolder(object):
771
+ """
772
+ Rey's `folder` type.
773
+ """
774
+
775
+
776
+ def __init__(
777
+ self,
778
+ path: Optional[str] = None
779
+ ) -> None:
780
+ """
781
+ Build `folder` attributes.
782
+
783
+ Parameters
784
+ ----------
785
+ path : Folder path.
786
+ - `None`: Work folder path.
787
+ - `str`: Use this folder path.
788
+ """
789
+
790
+ # Set attribute.
791
+ if path is None:
792
+ path = ''
793
+ self.path = os_abspath(path)
794
+
795
+
796
+ def paths(
797
+ self,
798
+ target: Literal['all', 'file', 'folder'] = 'all',
799
+ recursion: bool = False
800
+ ) -> list:
801
+ """
802
+ Get the path of files and folders in the folder path.
803
+
804
+ Parameters
805
+ ----------
806
+ target : Target data.
807
+ - `Literal['all']`: Return file and folder path.
808
+ - `Literal['file']`: Return file path.
809
+ - `Literal['folder']`: Return folder path.
810
+ recursion : Is recursion directory.
811
+
812
+ Returns
813
+ -------
814
+ String is path.
815
+ """
816
+
817
+ # Get paths.
818
+ paths = []
819
+
820
+ ## Recursive.
821
+ if recursion:
822
+ obj_walk = os_walk(self.path)
823
+ match target:
824
+ case 'all':
825
+ targets_path = [
826
+ os_join(path, file_name)
827
+ for path, folders_name, files_name in obj_walk
828
+ for file_name in files_name + folders_name
829
+ ]
830
+ paths.extend(targets_path)
831
+ case 'file':
832
+ targets_path = [
833
+ os_join(path, file_name)
834
+ for path, _, files_name in obj_walk
835
+ for file_name in files_name
836
+ ]
837
+ paths.extend(targets_path)
838
+ case 'all' | 'folder':
839
+ targets_path = [
840
+ os_join(path, folder_name)
841
+ for path, folders_name, _ in obj_walk
842
+ for folder_name in folders_name
843
+ ]
844
+ paths.extend(targets_path)
845
+
846
+ ## Non recursive.
847
+ else:
848
+ names = os_listdir(self.path)
849
+ match target:
850
+ case 'all':
851
+ for name in names:
852
+ target_path = os_join(self.path, name)
853
+ paths.append(target_path)
854
+ case 'file':
855
+ for name in names:
856
+ target_path = os_join(self.path, name)
857
+ is_file = os_isfile(target_path)
858
+ if is_file:
859
+ paths.append(target_path)
860
+ case 'folder':
861
+ for name in names:
862
+ target_path = os_join(self.path, name)
863
+ is_dir = os_isdir(target_path)
864
+ if is_dir:
865
+ paths.append(target_path)
866
+
867
+ return paths
868
+
869
+
870
+ @overload
871
+ def search(
872
+ self,
873
+ pattern: str,
874
+ recursion: bool = False,
875
+ all_ : Literal[False] = False
876
+ ) -> Optional[str]: ...
877
+
878
+ @overload
879
+ def search(
880
+ self,
881
+ pattern: str,
882
+ recursion: bool = False,
883
+ all_ : Literal[True] = False
884
+ ) -> list[str]: ...
885
+
886
+ def search(
887
+ self,
888
+ pattern: str,
889
+ recursion: bool = False,
890
+ all_ : bool = False
891
+ ) -> Optional[str]:
892
+ """
893
+ Search file by name.
894
+
895
+ Parameters
896
+ ----------
897
+ pattern : Match file name pattern.
898
+ recursion : Is recursion directory.
899
+ all_ : Whether return all match file path, otherwise return first match file path.
900
+
901
+ Returns
902
+ -------
903
+ Match file path or null.
904
+ """
905
+
906
+ # Get paths.
907
+ file_paths = self.paths('file', recursion)
908
+
909
+ # All.
910
+ if all_:
911
+ match_paths = []
912
+ for path in file_paths:
913
+ file_name = os_basename(path)
914
+ result = search(pattern, file_name)
915
+ if result is not None:
916
+ match_paths.append(path)
917
+ return match_paths
918
+
919
+ # First.
920
+ else:
921
+ for path in file_paths:
922
+ file_name = os_basename(path)
923
+ result = search(pattern, file_name)
924
+ if result is not None:
925
+ return path
926
+
927
+
928
+ def create(
929
+ self,
930
+ report: bool = False
931
+ ) -> None:
932
+ """
933
+ Create folders.
934
+
935
+ Parameters
936
+ ----------
937
+ report : Whether report the creation process.
938
+ """
939
+
940
+ # Exist.
941
+ exist = os_exists(self.path)
942
+ if exist:
943
+ text = 'Folder already exists | %s' % self.path
944
+
945
+ # Not exist.
946
+ else:
947
+ os_makedirs(self.path)
948
+ text = 'Folder creation complete | %s' % self.path
949
+
950
+ # Report.
951
+ if report:
952
+ print(text)
953
+
954
+
955
+ def move(
956
+ self,
957
+ path: str
958
+ ) -> None:
959
+ """
960
+ Move folder to path.
961
+
962
+ Parameters
963
+ ----------
964
+ path : Move path.
965
+ """
966
+
967
+ # Move.
968
+ os_renames(
969
+ self.path,
970
+ path
971
+ )
972
+
973
+
974
+ @property
975
+ def name(self) -> str:
976
+ """
977
+ Return folder name.
978
+
979
+ Returns
980
+ -------
981
+ Folder name.
982
+ """
983
+
984
+ # Get.
985
+ folder_name = os_basename(self.path)
986
+
987
+ return folder_name
988
+
989
+
990
+ @property
991
+ def dir(self) -> str:
992
+ """
993
+ Return folder directory.
994
+
995
+ Returns
996
+ -------
997
+ Folder directory.
998
+ """
999
+
1000
+ # Get.
1001
+ folder_dir = os_dirname(self.path)
1002
+
1003
+ return folder_dir
1004
+
1005
+
1006
+ @property
1007
+ def drive(self) -> str:
1008
+ """
1009
+ Return folder drive letter.
1010
+
1011
+ Returns
1012
+ -------
1013
+ Folder drive letter.
1014
+ """
1015
+
1016
+ # Get.
1017
+ folder_drive, _ = os_splitdrive(self.path)
1018
+
1019
+ return folder_drive
1020
+
1021
+
1022
+ @property
1023
+ def size(self) -> int:
1024
+ """
1025
+ Return folder byte size, include all files in it.
1026
+
1027
+ Returns
1028
+ -------
1029
+ Folder byte size.
1030
+ """
1031
+
1032
+ # Get.
1033
+ file_paths = self.paths('file', True)
1034
+ file_sizes = [
1035
+ os_getsize(path)
1036
+ for path in file_paths
1037
+ ]
1038
+ folder_size = sum(file_sizes)
1039
+
1040
+ return folder_size
1041
+
1042
+
1043
+ @property
1044
+ def ctime(self) -> float:
1045
+ """
1046
+ Return file create timestamp.
1047
+
1048
+ Returns
1049
+ -------
1050
+ File create timestamp.
1051
+ """
1052
+
1053
+ # Get.
1054
+ folder_ctime = os_getctime(self.path)
1055
+
1056
+ return folder_ctime
1057
+
1058
+
1059
+ @property
1060
+ def mtime(self) -> float:
1061
+ """
1062
+ Return file modify timestamp.
1063
+
1064
+ Returns
1065
+ -------
1066
+ File modify timestamp.
1067
+ """
1068
+
1069
+ # Get.
1070
+ folder_mtime = os_getmtime(self.path)
1071
+
1072
+ return folder_mtime
1073
+
1074
+
1075
+ @property
1076
+ def atime(self) -> float:
1077
+ """
1078
+ Return file access timestamp.
1079
+
1080
+ Returns
1081
+ -------
1082
+ File access timestamp.
1083
+ """
1084
+
1085
+ # Get.
1086
+ folder_atime = os_getatime(self.path)
1087
+
1088
+ return folder_atime
1089
+
1090
+
1091
+ def __bool__(self) -> bool:
1092
+ """
1093
+ Judge if exist.
1094
+
1095
+ Returns
1096
+ -------
1097
+ Judge result.
1098
+ """
1099
+
1100
+ # Judge.
1101
+ folder_exist = os_isdir(self.path)
1102
+
1103
+ return folder_exist
1104
+
1105
+
1106
+ def __len__(self) -> int:
1107
+ """
1108
+ Return folder byte size, include all files in it.
1109
+
1110
+ Returns
1111
+ -------
1112
+ Folder byte size.
1113
+ """
1114
+
1115
+ # Get.
1116
+ folder_size = self.size
1117
+
1118
+ return folder_size
1119
+
1120
+
1121
+ def __contains__(self, pattern: str) -> bool:
1122
+ """
1123
+ Search file by name, recursion directory.
1124
+
1125
+ Parameters
1126
+ ----------
1127
+ pattern : Match file name pattern.
1128
+
1129
+ Returns
1130
+ -------
1131
+ Judge result.
1132
+ """
1133
+
1134
+ # Judge.
1135
+ result = self.search(pattern, True)
1136
+ judge = result is not None
1137
+
1138
+ return judge
1139
+
1140
+
1141
+ __call__ = paths
1142
+
1143
+
1144
+ class RTempFile(object):
1145
+ """
1146
+ Rey's `temporary file` type.
1147
+ """
1148
+
1149
+
1150
+ def __init__(
1151
+ self,
1152
+ dir_: Optional[str] = None,
1153
+ suffix: Optional[str] = None,
1154
+ type_: Literal['str', 'bytes'] = 'bytes'
1155
+ ) -> None:
1156
+ """
1157
+ Build `temporary file` attributes.
1158
+
1159
+ Parameters
1160
+ ----------
1161
+ dir_ : Directory path.
1162
+ suffix : File suffix.
1163
+ type_ : File data type.
1164
+ """
1165
+
1166
+ # Get parameter.
1167
+ match type_:
1168
+ case 'bytes':
1169
+ mode = 'w+b'
1170
+ case 'str':
1171
+ mode = 'w+'
1172
+ case _:
1173
+ throw(ValueError, type_)
1174
+
1175
+ # Set attribute.
1176
+ self.file = TemporaryFile(
1177
+ mode,
1178
+ suffix=suffix,
1179
+ dir=dir_
1180
+ )
1181
+ self.path = self.file.name
1182
+
1183
+
1184
+ def read(self) -> Union[bytes, str]:
1185
+ """
1186
+ Read file data.
1187
+
1188
+ Returns
1189
+ -------
1190
+ File data.
1191
+ """
1192
+
1193
+ # Read.
1194
+ self.file.seek(0)
1195
+ content = self.file.read()
1196
+
1197
+ return content
1198
+
1199
+
1200
+ def write(
1201
+ self,
1202
+ data: Union[str, bytes]
1203
+ ) -> None:
1204
+ """
1205
+ Write file data.
1206
+
1207
+ Parameters
1208
+ ----------
1209
+ data : Write data.
1210
+ """
1211
+
1212
+ # Write.
1213
+ self.file.write(data)
1214
+ self.file.seek(0)
1215
+
1216
+
1217
+ @property
1218
+ def name_suffix(self) -> str:
1219
+ """
1220
+ Return file name with suffix.
1221
+
1222
+ Returns
1223
+ -------
1224
+ File name with suffix.
1225
+ """
1226
+
1227
+ # Get.
1228
+ file_name_suffix = os_basename(self.path)
1229
+
1230
+ return file_name_suffix
1231
+
1232
+
1233
+ @property
1234
+ def name(self) -> str:
1235
+ """
1236
+ Return file name not with suffix.
1237
+
1238
+ Returns
1239
+ -------
1240
+ File name not with suffix.
1241
+ """
1242
+
1243
+ # Get.
1244
+ file_name, _ = os_splitext(self.name_suffix)
1245
+
1246
+ return file_name
1247
+
1248
+
1249
+ @property
1250
+ def suffix(self) -> str:
1251
+ """
1252
+ Return file suffix.
1253
+
1254
+ Returns
1255
+ -------
1256
+ File suffix.
1257
+ """
1258
+
1259
+ # Get.
1260
+ _, file_suffix = os_splitext(self.path)
1261
+
1262
+ return file_suffix
1263
+
1264
+
1265
+ @property
1266
+ def dir(self) -> str:
1267
+ """
1268
+ Return file directory.
1269
+
1270
+ Returns
1271
+ -------
1272
+ File directory.
1273
+ """
1274
+
1275
+ # Get.
1276
+ file_dir = os_dirname(self.path)
1277
+
1278
+ return file_dir
1279
+
1280
+
1281
+ @property
1282
+ def drive(self) -> str:
1283
+ """
1284
+ Return file drive letter.
1285
+
1286
+ Returns
1287
+ -------
1288
+ File drive letter.
1289
+ """
1290
+
1291
+ # Get.
1292
+ file_drive, _ = os_splitdrive(self.path)
1293
+
1294
+ return file_drive
1295
+
1296
+
1297
+ @property
1298
+ def size(self) -> int:
1299
+ """
1300
+ Return file byte size.
1301
+
1302
+ Returns
1303
+ -------
1304
+ File byte size.
1305
+ """
1306
+
1307
+ # Get.
1308
+ file_size = os_getsize(self.path)
1309
+
1310
+ return file_size
1311
+
1312
+
1313
+ @property
1314
+ def ctime(self) -> float:
1315
+ """
1316
+ Return file create timestamp.
1317
+
1318
+ Returns
1319
+ -------
1320
+ File create timestamp.
1321
+ """
1322
+
1323
+ # Get.
1324
+ file_ctime = os_getctime(self.path)
1325
+
1326
+ return file_ctime
1327
+
1328
+
1329
+ @property
1330
+ def mtime(self) -> float:
1331
+ """
1332
+ Return file modify timestamp.
1333
+
1334
+ Returns
1335
+ -------
1336
+ File modify timestamp.
1337
+ """
1338
+
1339
+ # Get.
1340
+ file_mtime = os_getmtime(self.path)
1341
+
1342
+ return file_mtime
1343
+
1344
+
1345
+ @property
1346
+ def atime(self) -> float:
1347
+ """
1348
+ Return file access timestamp.
1349
+
1350
+ Returns
1351
+ -------
1352
+ File access timestamp.
1353
+ """
1354
+
1355
+ # Get.
1356
+ file_atime = os_getatime(self.path)
1357
+
1358
+ return file_atime
1359
+
1360
+
1361
+ @property
1362
+ def md5(self) -> float:
1363
+ """
1364
+ Return file MD5 value.
1365
+
1366
+ Returns
1367
+ -------
1368
+ File MD5 value
1369
+ """
1370
+
1371
+ # Get.
1372
+ file_md5 = get_md5(self.path)
1373
+
1374
+ return file_md5
1375
+
1376
+
1377
+ def __len__(self) -> int:
1378
+ """
1379
+ Return file byte size.
1380
+
1381
+ Returns
1382
+ -------
1383
+ File byte size.
1384
+ """
1385
+
1386
+ # Get.
1387
+ file_size = self.size
1388
+
1389
+ return file_size
1390
+
1391
+
1392
+ def __contains__(
1393
+ self,
1394
+ value: Union[str, bytes]
1395
+ ) -> bool:
1396
+ """
1397
+ Judge if file text contain value.
1398
+
1399
+ Parameters
1400
+ ----------
1401
+ value : Judge value.
1402
+
1403
+ Returns
1404
+ -------
1405
+ Judge result.
1406
+ """
1407
+
1408
+ # Get parameter.
1409
+ content = self.read()
1410
+
1411
+ # Judge.
1412
+ judge = value in content
1413
+
1414
+ return judge
1415
+
1416
+
1417
+ def __del__(self) -> None:
1418
+ """
1419
+ Close temporary file.
1420
+ """
1421
+
1422
+ # Close.
1423
+ self.file.close()
1424
+
1425
+
1426
+ __call__ = write
1427
+
1428
+
1429
+ class RTempFolder(object):
1430
+ """
1431
+ Rey's `temporary folder` type.
1432
+ """
1433
+
1434
+
1435
+ def __init__(
1436
+ self,
1437
+ dir_: Optional[str] = None
1438
+ ) -> None:
1439
+ """
1440
+ Build `temporary folder` attributes.
1441
+
1442
+ Parameters
1443
+ ----------
1444
+ dir_ : Directory path.
1445
+ """
1446
+
1447
+ # Set attribute.
1448
+ self.folder = TemporaryDirectory(dir=dir_)
1449
+ self.path = os_abspath(self.folder.name)
1450
+
1451
+
1452
+ def paths(
1453
+ self,
1454
+ target: Literal['all', 'file', 'folder'] = 'all',
1455
+ recursion: bool = False
1456
+ ) -> list:
1457
+ """
1458
+ Get the path of files and folders in the folder path.
1459
+
1460
+ Parameters
1461
+ ----------
1462
+ target : Target data.
1463
+ - `Literal['all']`: Return file and folder path.
1464
+ - `Literal['file']`: Return file path.
1465
+ - `Literal['folder']`: Return folder path.
1466
+ recursion : Is recursion directory.
1467
+
1468
+ Returns
1469
+ -------
1470
+ String is path.
1471
+ """
1472
+
1473
+ # Get paths.
1474
+ paths = []
1475
+
1476
+ ## Recursive.
1477
+ if recursion:
1478
+ obj_walk = os_walk(self.path)
1479
+ match target:
1480
+ case 'all':
1481
+ targets_path = [
1482
+ os_join(path, file_name)
1483
+ for path, folders_name, files_name in obj_walk
1484
+ for file_name in files_name + folders_name
1485
+ ]
1486
+ paths.extend(targets_path)
1487
+ case 'file':
1488
+ targets_path = [
1489
+ os_join(path, file_name)
1490
+ for path, _, files_name in obj_walk
1491
+ for file_name in files_name
1492
+ ]
1493
+ paths.extend(targets_path)
1494
+ case 'all' | 'folder':
1495
+ targets_path = [
1496
+ os_join(path, folder_name)
1497
+ for path, folders_name, _ in obj_walk
1498
+ for folder_name in folders_name
1499
+ ]
1500
+ paths.extend(targets_path)
1501
+
1502
+ ## Non recursive.
1503
+ else:
1504
+ names = os_listdir(self.path)
1505
+ match target:
1506
+ case 'all':
1507
+ for name in names:
1508
+ target_path = os_join(self.path, name)
1509
+ paths.append(target_path)
1510
+ case 'file':
1511
+ for name in names:
1512
+ target_path = os_join(self.path, name)
1513
+ is_file = os_isfile(target_path)
1514
+ if is_file:
1515
+ paths.append(target_path)
1516
+ case 'folder':
1517
+ for name in names:
1518
+ target_path = os_join(self.path, name)
1519
+ is_dir = os_isdir(target_path)
1520
+ if is_dir:
1521
+ paths.append(target_path)
1522
+
1523
+ return paths
1524
+
1525
+
1526
+ @overload
1527
+ def search(
1528
+ self,
1529
+ pattern: str,
1530
+ recursion: bool = False,
1531
+ all_ : Literal[False] = False
1532
+ ) -> Optional[str]: ...
1533
+
1534
+ @overload
1535
+ def search(
1536
+ self,
1537
+ pattern: str,
1538
+ recursion: bool = False,
1539
+ all_ : Literal[True] = False
1540
+ ) -> list[str]: ...
1541
+
1542
+ def search(
1543
+ self,
1544
+ pattern: str,
1545
+ recursion: bool = False,
1546
+ all_ : bool = False
1547
+ ) -> Optional[str]:
1548
+ """
1549
+ Search file by name.
1550
+
1551
+ Parameters
1552
+ ----------
1553
+ pattern : Match file name pattern.
1554
+ recursion : Is recursion directory.
1555
+ all_ : Whether return all match file path, otherwise return first match file path.
1556
+
1557
+ Returns
1558
+ -------
1559
+ Match file path or null.
1560
+ """
1561
+
1562
+ # Get paths.
1563
+ file_paths = self.paths('file', recursion)
1564
+
1565
+ # All.
1566
+ if all_:
1567
+ match_paths = []
1568
+ for path in file_paths:
1569
+ file_name = os_basename(path)
1570
+ result = search(pattern, file_name)
1571
+ if result is not None:
1572
+ match_paths.append(path)
1573
+ return match_paths
1574
+
1575
+ # First.
1576
+ else:
1577
+ for path in file_paths:
1578
+ file_name = os_basename(path)
1579
+ result = search(pattern, file_name)
1580
+ if result is not None:
1581
+ return path
1582
+
1583
+
1584
+ @property
1585
+ def name(self) -> str:
1586
+ """
1587
+ Return folder name.
1588
+
1589
+ Returns
1590
+ -------
1591
+ Folder name.
1592
+ """
1593
+
1594
+ # Get.
1595
+ folder_name = os_basename(self.path)
1596
+
1597
+ return folder_name
1598
+
1599
+
1600
+ @property
1601
+ def dir(self) -> str:
1602
+ """
1603
+ Return folder directory.
1604
+
1605
+ Returns
1606
+ -------
1607
+ Folder directory.
1608
+ """
1609
+
1610
+ # Get.
1611
+ folder_dir = os_dirname(self.path)
1612
+
1613
+ return folder_dir
1614
+
1615
+
1616
+ @property
1617
+ def drive(self) -> str:
1618
+ """
1619
+ Return folder drive letter.
1620
+
1621
+ Returns
1622
+ -------
1623
+ Folder drive letter.
1624
+ """
1625
+
1626
+ # Get.
1627
+ folder_drive, _ = os_splitdrive(self.path)
1628
+
1629
+ return folder_drive
1630
+
1631
+
1632
+ @property
1633
+ def size(self) -> int:
1634
+ """
1635
+ Return folder byte size, include all files in it.
1636
+
1637
+ Returns
1638
+ -------
1639
+ Folder byte size.
1640
+ """
1641
+
1642
+ # Get.
1643
+ file_paths = self.paths('file', True)
1644
+ file_sizes = [
1645
+ os_getsize(path)
1646
+ for path in file_paths
1647
+ ]
1648
+ folder_size = sum(file_sizes)
1649
+
1650
+ return folder_size
1651
+
1652
+
1653
+ @property
1654
+ def ctime(self) -> float:
1655
+ """
1656
+ Return file create timestamp.
1657
+
1658
+ Returns
1659
+ -------
1660
+ File create timestamp.
1661
+ """
1662
+
1663
+ # Get.
1664
+ folder_ctime = os_getctime(self.path)
1665
+
1666
+ return folder_ctime
1667
+
1668
+
1669
+ @property
1670
+ def mtime(self) -> float:
1671
+ """
1672
+ Return file modify timestamp.
1673
+
1674
+ Returns
1675
+ -------
1676
+ File modify timestamp.
1677
+ """
1678
+
1679
+ # Get.
1680
+ folder_mtime = os_getmtime(self.path)
1681
+
1682
+ return folder_mtime
1683
+
1684
+
1685
+ @property
1686
+ def atime(self) -> float:
1687
+ """
1688
+ Return file access timestamp.
1689
+
1690
+ Returns
1691
+ -------
1692
+ File access timestamp.
1693
+ """
1694
+
1695
+ # Get.
1696
+ folder_atime = os_getatime(self.path)
1697
+
1698
+ return folder_atime
1699
+
1700
+
1701
+ def __bool__(self) -> bool:
1702
+ """
1703
+ Judge if exist.
1704
+
1705
+ Returns
1706
+ -------
1707
+ Judge result.
1708
+ """
1709
+
1710
+ # Judge.
1711
+ folder_exist = os_isdir(self.path)
1712
+
1713
+ return folder_exist
1714
+
1715
+
1716
+ def __len__(self) -> int:
1717
+ """
1718
+ Return folder byte size, include all files in it.
1719
+
1720
+ Returns
1721
+ -------
1722
+ Folder byte size.
1723
+ """
1724
+
1725
+ # Get.
1726
+ folder_size = self.size
1727
+
1728
+ return folder_size
1729
+
1730
+
1731
+ def __contains__(self, pattern: str) -> bool:
1732
+ """
1733
+ Search file by name, recursion directory.
1734
+
1735
+ Parameters
1736
+ ----------
1737
+ pattern : Match file name pattern.
1738
+
1739
+ Returns
1740
+ -------
1741
+ Judge result.
1742
+ """
1743
+
1744
+ # Judge.
1745
+ result = self.search(pattern, True)
1746
+ judge = result is not None
1747
+
1748
+ return judge
1749
+
1750
+
1751
+ def __del__(self) -> None:
1752
+ """
1753
+ Close temporary folder.
1754
+ """
1755
+
1756
+ # Close.
1757
+ self.folder.cleanup()
1758
+
1759
+
1760
+ __call__ = paths
1761
+
1762
+
1763
+ def doc_to_docx(
1764
+ path: str,
1765
+ save_path: Optional[str] = None
1766
+ ) -> str:
1767
+ """
1768
+ Convert `DOC` file to `DOCX` file.
1769
+
1770
+ Parameters
1771
+ ----------
1772
+ path : DOC file path.
1773
+ save_path : DOCX sve file path.
1774
+ - `None`: DOC file Directory.
1775
+
1776
+ Returns
1777
+ -------
1778
+ DOCX file path.
1779
+ """
1780
+
1781
+
1782
+ from win32com.client import Dispatch, CDispatch
1783
+
1784
+
1785
+ # Handle parameter.
1786
+ if save_path is None:
1787
+ pattern = '.[dD][oO][cC]'
1788
+ save_path = sub(
1789
+ pattern,
1790
+ path.replace('\\', '/'),
1791
+ '.docx'
1792
+ )
1793
+
1794
+ # Convert.
1795
+ cdispatch = Dispatch('Word.Application')
1796
+ document: CDispatch = cdispatch.Documents.Open(path)
1797
+ document.SaveAs(save_path, 16)
1798
+ document.Close()
1799
+
1800
+ return save_path
1801
+
1802
+
1803
+ def extract_docx_content(path: str) -> str:
1804
+ """
1805
+ Extract content from `DOCX` file.
1806
+
1807
+ Parameters
1808
+ ----------
1809
+ path : File path.
1810
+
1811
+ returns
1812
+ -------
1813
+ Content.
1814
+ """
1815
+
1816
+ # Extract.
1817
+ document: Document = docx_document(path)
1818
+ childs_iter: ElementChildIterator = document.element.body.iterchildren()
1819
+ contents = []
1820
+ for child in childs_iter:
1821
+ match child:
1822
+
1823
+ ## Text.
1824
+ case CT_P():
1825
+ paragraph = Paragraph(child, document)
1826
+ contents.append(paragraph.text)
1827
+
1828
+ ## Table.
1829
+ case CT_Tbl():
1830
+ table = Table(child, document)
1831
+ table_text = '\n'.join(
1832
+ [
1833
+ ' | '.join(
1834
+ [
1835
+ cell.text.strip().replace('\n', ' ')
1836
+ for cell in row.cells
1837
+ if (
1838
+ cell.text is not None
1839
+ and cell.text.strip() != ''
1840
+ )
1841
+ ]
1842
+ )
1843
+ for row in table.rows
1844
+ ]
1845
+ )
1846
+ table_text = '\n%s\n' % table_text
1847
+ contents.append(table_text)
1848
+
1849
+ ## Join.
1850
+ content = '\n'.join(contents)
1851
+
1852
+ return content
1853
+
1854
+
1855
+ def extract_pdf_content(path: str) -> str:
1856
+ """
1857
+ Extract content from `PDF` file.
1858
+
1859
+ Parameters
1860
+ ----------
1861
+ path : File path.
1862
+
1863
+ returns
1864
+ -------
1865
+ Content.
1866
+ """
1867
+
1868
+ # Extract.
1869
+ document = pdfplumber_open(path)
1870
+ contents = [
1871
+ page.extract_text()
1872
+ for page in document.pages
1873
+ ]
1874
+ document.close()
1875
+
1876
+ ## Join.
1877
+ content = '\n'.join(contents)
1878
+
1879
+ return content
1880
+
1881
+
1882
+ def extract_file_content(path: str) -> str:
1883
+ """
1884
+ Extract content from `DOC` or `DOCX` or `PDF` file.
1885
+
1886
+ Parameters
1887
+ ----------
1888
+ path : File path.
1889
+
1890
+ returns
1891
+ -------
1892
+ Content.
1893
+ """
1894
+
1895
+ # Handle parameter.
1896
+ _, suffix = os_splitext(path)
1897
+ suffix = suffix.lower()
1898
+ if suffix == '.doc':
1899
+ path = doc_to_docx(path)
1900
+ suffix = '.docx'
1901
+
1902
+ # Extract.
1903
+ match suffix:
1904
+
1905
+ ## DOCX.
1906
+ case '.docx':
1907
+ content = extract_docx_content(path)
1908
+
1909
+ ## PDF.
1910
+ case '.pdf':
1911
+ content = extract_pdf_content(path)
1912
+
1913
+ ## Throw exception.
1914
+ case _:
1915
+ throw(value=suffix)
1916
+
1917
+ return content