ScriptCollection 4.2.72__py3-none-any.whl → 4.2.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ScriptCollection/Executables.py +3 -6
- ScriptCollection/ScriptCollectionCore.py +54 -25
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.74.dist-info}/METADATA +1 -1
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.74.dist-info}/RECORD +7 -7
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.74.dist-info}/WHEEL +0 -0
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.74.dist-info}/entry_points.txt +0 -0
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.74.dist-info}/top_level.txt +0 -0
ScriptCollection/Executables.py
CHANGED
|
@@ -706,14 +706,13 @@ def OCRAnalysisOfFolder() -> int:
|
|
|
706
706
|
parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
|
|
707
707
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
708
708
|
parser.add_argument('-f', '--folder', required=False, default=None)
|
|
709
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
710
709
|
args = parser.parse_args()
|
|
711
710
|
sc = ScriptCollectionCore()
|
|
712
711
|
if args.folder is None:
|
|
713
712
|
args.folder = os.getcwd()
|
|
714
713
|
languages=args.languages.split(",")
|
|
715
714
|
extensions=args.extensions.split(",")
|
|
716
|
-
sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.
|
|
715
|
+
sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.folder,[])
|
|
717
716
|
return 0
|
|
718
717
|
|
|
719
718
|
|
|
@@ -722,11 +721,10 @@ def OCRAnalysisOfFile() -> int:
|
|
|
722
721
|
parser.add_argument('-s', '--serviceaddress', required=False, default=None)
|
|
723
722
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
724
723
|
parser.add_argument('-f', '--file', required=True)
|
|
725
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
726
724
|
args = parser.parse_args()
|
|
727
725
|
sc = ScriptCollectionCore()
|
|
728
726
|
languages=args.languages.split(",")
|
|
729
|
-
sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,
|
|
727
|
+
sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,".")
|
|
730
728
|
return 0
|
|
731
729
|
|
|
732
730
|
|
|
@@ -736,14 +734,13 @@ def OCRAnalysisOfRepository() -> int:
|
|
|
736
734
|
parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
|
|
737
735
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
738
736
|
parser.add_argument('-f', '--folder', required=False, default=None)
|
|
739
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
740
737
|
args = parser.parse_args()
|
|
741
738
|
sc = ScriptCollectionCore()
|
|
742
739
|
if args.folder is None:
|
|
743
740
|
args.folder = os.getcwd()
|
|
744
741
|
languages=args.languages.split(",")
|
|
745
742
|
extensions=args.extensions.split(",")
|
|
746
|
-
sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages
|
|
743
|
+
sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages)
|
|
747
744
|
return 0
|
|
748
745
|
|
|
749
746
|
|
|
@@ -37,7 +37,7 @@ from .ProgramRunnerBase import ProgramRunnerBase
|
|
|
37
37
|
from .ProgramRunnerPopen import ProgramRunnerPopen
|
|
38
38
|
from .SCLog import SCLog, LogLevel
|
|
39
39
|
|
|
40
|
-
version = "4.2.
|
|
40
|
+
version = "4.2.74"
|
|
41
41
|
__version__ = version
|
|
42
42
|
|
|
43
43
|
class VSCodeWorkspaceShellTask:
|
|
@@ -2674,7 +2674,14 @@ TXDX
|
|
|
2674
2674
|
self.run_program_argsasarray("pip", arguments, folder,print_live_output=self.log.loglevel==LogLevel.Debug)
|
|
2675
2675
|
|
|
2676
2676
|
@GeneralUtilities.check_arguments
|
|
2677
|
-
def
|
|
2677
|
+
def ocr_analysis_of_folder_using_local_docker_image(self, folder: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
|
|
2678
|
+
#TODO start docker server
|
|
2679
|
+
serviceaddress:str=None#TODO
|
|
2680
|
+
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, base_folder_for_entry,ignore_pattern)
|
|
2681
|
+
#TODO stop docker server
|
|
2682
|
+
|
|
2683
|
+
@GeneralUtilities.check_arguments
|
|
2684
|
+
def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
|
|
2678
2685
|
supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif', 'pdf', 'docx', 'doc', 'xlsx', 'xls', 'pptx', 'ppt']
|
|
2679
2686
|
changes_files: list[str] = []
|
|
2680
2687
|
if base_folder_for_entry is None:
|
|
@@ -2687,13 +2694,13 @@ TXDX
|
|
|
2687
2694
|
file_lower = file.lower()
|
|
2688
2695
|
for extension in extensions:
|
|
2689
2696
|
if file_lower.endswith("."+extension):
|
|
2690
|
-
if self.ocr_analysis_of_file(file, serviceaddress, languages,
|
|
2697
|
+
if self.ocr_analysis_of_file(file, serviceaddress, languages,base_folder_for_entry):
|
|
2691
2698
|
changes_files.append(file)
|
|
2692
2699
|
break
|
|
2693
2700
|
for subfolder in GeneralUtilities.get_direct_folders_of_folder(folder):
|
|
2694
2701
|
if GeneralUtilities.is_ignored_by_glob_pattern(os.path.dirname(subfolder),subfolder,ignore_pattern):
|
|
2695
2702
|
continue
|
|
2696
|
-
for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,
|
|
2703
|
+
for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
|
|
2697
2704
|
changes_files.append(file)
|
|
2698
2705
|
return changes_files
|
|
2699
2706
|
|
|
@@ -2707,8 +2714,7 @@ TXDX
|
|
|
2707
2714
|
return False
|
|
2708
2715
|
|
|
2709
2716
|
@GeneralUtilities.check_arguments
|
|
2710
|
-
def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str],
|
|
2711
|
-
GeneralUtilities.write_message_to_stdout(f"Starting OCR analysis of file {file}...")
|
|
2717
|
+
def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
|
|
2712
2718
|
supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'webp', 'gif', 'pdf', 'rtf', 'docx', 'doc', 'odt', 'xlsx', 'xls', 'ods', 'pptx', 'ppt', 'odp']
|
|
2713
2719
|
if not self.__it_supported_extension(file, supported_extensions):
|
|
2714
2720
|
raise ValueError(f"File '{file}' is not supported due to unsupported extension. Supported extensions are: {', '.join(supported_extensions)}")
|
|
@@ -2722,7 +2728,8 @@ TXDX
|
|
|
2722
2728
|
return False
|
|
2723
2729
|
except:
|
|
2724
2730
|
pass
|
|
2725
|
-
|
|
2731
|
+
GeneralUtilities.write_message_to_stdout(f"Starting OCR-analysis of file \"{file}\"...")
|
|
2732
|
+
ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages)
|
|
2726
2733
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
2727
2734
|
if readable_folder_entry is None:
|
|
2728
2735
|
readable_folder_entry="."
|
|
@@ -2734,30 +2741,52 @@ OCR-content:
|
|
|
2734
2741
|
return True
|
|
2735
2742
|
|
|
2736
2743
|
@GeneralUtilities.check_arguments
|
|
2737
|
-
def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]
|
|
2744
|
+
def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]) -> str:
|
|
2738
2745
|
result: str = None
|
|
2739
|
-
extension = Path(file).suffix
|
|
2746
|
+
extension = Path(file).suffix[1:]
|
|
2747
|
+
mime_types = {
|
|
2748
|
+
"pdf": "application/pdf",
|
|
2749
|
+
"png": "image/png",
|
|
2750
|
+
"jpg": "image/jpeg",
|
|
2751
|
+
"jpeg": "image/jpeg",
|
|
2752
|
+
"txt": "text/plain",
|
|
2753
|
+
"json": "application/json",
|
|
2754
|
+
"doc": "application/msword",
|
|
2755
|
+
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
2756
|
+
"xls": "application/vnd.ms-excel",
|
|
2757
|
+
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
2758
|
+
}
|
|
2740
2759
|
if serviceaddress is None:
|
|
2741
|
-
|
|
2742
|
-
if
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2760
|
+
server_url_file:str= GeneralUtilities.normalize_path(f"{str(Path.home())}/.ScriptCollection/OCR/ServiceURL.txt")
|
|
2761
|
+
if os.path.isfile(server_url_file):
|
|
2762
|
+
for line in GeneralUtilities.read_nonempty_lines_from_file(server_url_file):
|
|
2763
|
+
if not line.startswith("#"):
|
|
2764
|
+
serviceaddress = line.strip()
|
|
2765
|
+
break
|
|
2766
|
+
GeneralUtilities.assert_not_null(serviceaddress, "ocr-service-address must not be null.")
|
|
2767
|
+
mime_type = mime_types.get(extension.lower(), "application/octet-stream")
|
|
2768
|
+
service_url: str = f"{serviceaddress}/API/v1/SimpleOCR/GetOCRContent?mimeType={mime_type}"
|
|
2769
|
+
for language in languages:
|
|
2770
|
+
service_url = service_url + f"&languages={language}"
|
|
2771
|
+
headers = {'Cache-Control': 'no-cache'}
|
|
2772
|
+
with open(file, "rb") as f:
|
|
2773
|
+
files_to_analyse = {
|
|
2774
|
+
"fileContent": (os.path.basename(file), f, mime_type)
|
|
2775
|
+
}
|
|
2776
|
+
r = requests.put(service_url, timeout=3600, headers=headers, files=files_to_analyse,verify=True)
|
|
2752
2777
|
if r.status_code != 200:
|
|
2753
|
-
|
|
2778
|
+
if r.status_code == 400:
|
|
2779
|
+
return f"Could not calculate ocr-content for file \"{file}\". File may be broken."
|
|
2780
|
+
else:
|
|
2781
|
+
raise ValueError(f"Retrieving ocr-content for file \"{file}\" resulted in HTTP-response-code {r.status_code}.")
|
|
2782
|
+
|
|
2754
2783
|
result = GeneralUtilities.bytes_to_string(r.content)
|
|
2755
2784
|
return result
|
|
2756
2785
|
|
|
2757
2786
|
@GeneralUtilities.check_arguments
|
|
2758
|
-
def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]
|
|
2787
|
+
def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]) -> None:
|
|
2759
2788
|
self.assert_is_git_repository(folder)
|
|
2760
|
-
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,
|
|
2789
|
+
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,".",[".git"])
|
|
2761
2790
|
|
|
2762
2791
|
@GeneralUtilities.check_arguments
|
|
2763
2792
|
def update_timestamp_in_file(self, target_file: str) -> None:
|
|
@@ -3299,7 +3328,7 @@ OCR-content:
|
|
|
3299
3328
|
if os.path.isabs(target_file):
|
|
3300
3329
|
target_file=GeneralUtilities.resolve_relative_path(target_file,repository_folder)
|
|
3301
3330
|
target_file=GeneralUtilities.normalize_path(target_file)
|
|
3302
|
-
files=self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)
|
|
3331
|
+
files=[path.replace("\\","/") for path in self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)]
|
|
3303
3332
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
3304
3333
|
GeneralUtilities.write_lines_to_file(target_file, files)
|
|
3305
3334
|
|
|
@@ -3322,7 +3351,7 @@ OCR-content:
|
|
|
3322
3351
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
3323
3352
|
GeneralUtilities.write_lines_to_file(target_file, commits)
|
|
3324
3353
|
|
|
3325
|
-
|
|
3326
3354
|
@GeneralUtilities.check_arguments
|
|
3327
3355
|
def is_runnning_in_container(self) ->bool:
|
|
3356
|
+
"""this function is based on a convention and does not do a real check."""
|
|
3328
3357
|
return os.environ.get("ISRUNNINGINCONTAINER") == "true"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
ScriptCollection/AnionBuildPlatform.py,sha256=K-PHarX802A0PU8uRu0GNcEZiXujFoXHACe-X9YJsAQ,11711
|
|
2
2
|
ScriptCollection/CertificateUpdater.py,sha256=Pa6eyjQSx7IIvj4PQVMI0IwMs01KQrNSB7Qa-7lRfBs,9375
|
|
3
|
-
ScriptCollection/Executables.py,sha256=
|
|
3
|
+
ScriptCollection/Executables.py,sha256=SsA3zeDL8QEsh7GxjDjatv5P4eFeDBPqB0F8pNXoYzA,44234
|
|
4
4
|
ScriptCollection/GeneralUtilities.py,sha256=3Fgp0fAXF-rfcohy6k1RsRcMXEVRF15fHl8QJnViKIg,65497
|
|
5
5
|
ScriptCollection/HTTPMaintenanceOverheadHelper.py,sha256=TToNtyO1XzsMbBsTBf3o0xgOK0v4Jf03qw2Z0xb2nCk,2007
|
|
6
6
|
ScriptCollection/ProcessesRunner.py,sha256=o5raxIt3lknNPoPrjNzJ2bprRPJ3SnL0rrR7crraD7E,1523
|
|
@@ -9,7 +9,7 @@ ScriptCollection/ProgramRunnerMock.py,sha256=uTu-aFle1W_oKjeQEmuPsFPQpvo0kRf2FrR
|
|
|
9
9
|
ScriptCollection/ProgramRunnerPopen.py,sha256=BPY7-ZMIlqT7JOKz8qlB5c0laF2Js-ijzqk09GxZC48,3821
|
|
10
10
|
ScriptCollection/ProgramRunnerSudo.py,sha256=_khC3xuTdrPoLluBJZWfldltmmuKltABJPcbjZSFW-4,4835
|
|
11
11
|
ScriptCollection/SCLog.py,sha256=8TRy1LeYMsPOIuWUcnUNNbO5pd-cNBS-3cn-kdzP8FU,4768
|
|
12
|
-
ScriptCollection/ScriptCollectionCore.py,sha256=
|
|
12
|
+
ScriptCollection/ScriptCollectionCore.py,sha256=5RXTdMUUCb1XvzPEj8JTqQosPQ4YgRufsbXpY6ZEHa8,181964
|
|
13
13
|
ScriptCollection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
ScriptCollection/OCIImages/AbstractImageHandler.py,sha256=83qDMILwxhH9DbC0sb358Vu8PXEysmJJyap_6gECZqs,1627
|
|
15
15
|
ScriptCollection/OCIImages/OCIImageManager.py,sha256=aBogkSXNDyi8NO11N-s03nuFJEv7PyJ-wjHuYYeZfvs,6662
|
|
@@ -47,8 +47,8 @@ ScriptCollection/TFCPS/NodeJS/TFCPS_CodeUnitSpecific_NodeJS.py,sha256=GQLE6FeR-X
|
|
|
47
47
|
ScriptCollection/TFCPS/NodeJS/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
48
|
ScriptCollection/TFCPS/Python/TFCPS_CodeUnitSpecific_Python.py,sha256=9XK7XnbeOnq_4siVoWovogStoKFiZLhGh3C_f2YaznI,13621
|
|
49
49
|
ScriptCollection/TFCPS/Python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
-
scriptcollection-4.2.
|
|
51
|
-
scriptcollection-4.2.
|
|
52
|
-
scriptcollection-4.2.
|
|
53
|
-
scriptcollection-4.2.
|
|
54
|
-
scriptcollection-4.2.
|
|
50
|
+
scriptcollection-4.2.74.dist-info/METADATA,sha256=X82BuSGCRx0eK2pnFKs2wEhh7_xViAovRmeiZpqitfc,7691
|
|
51
|
+
scriptcollection-4.2.74.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
52
|
+
scriptcollection-4.2.74.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
|
|
53
|
+
scriptcollection-4.2.74.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
|
|
54
|
+
scriptcollection-4.2.74.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|