ScriptCollection 4.2.72__py3-none-any.whl → 4.2.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ScriptCollection/Executables.py +3 -6
- ScriptCollection/ScriptCollectionCore.py +45 -24
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.73.dist-info}/METADATA +1 -1
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.73.dist-info}/RECORD +7 -7
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.73.dist-info}/WHEEL +0 -0
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.73.dist-info}/entry_points.txt +0 -0
- {scriptcollection-4.2.72.dist-info → scriptcollection-4.2.73.dist-info}/top_level.txt +0 -0
ScriptCollection/Executables.py
CHANGED
|
@@ -706,14 +706,13 @@ def OCRAnalysisOfFolder() -> int:
|
|
|
706
706
|
parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
|
|
707
707
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
708
708
|
parser.add_argument('-f', '--folder', required=False, default=None)
|
|
709
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
710
709
|
args = parser.parse_args()
|
|
711
710
|
sc = ScriptCollectionCore()
|
|
712
711
|
if args.folder is None:
|
|
713
712
|
args.folder = os.getcwd()
|
|
714
713
|
languages=args.languages.split(",")
|
|
715
714
|
extensions=args.extensions.split(",")
|
|
716
|
-
sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.
|
|
715
|
+
sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.folder,[])
|
|
717
716
|
return 0
|
|
718
717
|
|
|
719
718
|
|
|
@@ -722,11 +721,10 @@ def OCRAnalysisOfFile() -> int:
|
|
|
722
721
|
parser.add_argument('-s', '--serviceaddress', required=False, default=None)
|
|
723
722
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
724
723
|
parser.add_argument('-f', '--file', required=True)
|
|
725
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
726
724
|
args = parser.parse_args()
|
|
727
725
|
sc = ScriptCollectionCore()
|
|
728
726
|
languages=args.languages.split(",")
|
|
729
|
-
sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,
|
|
727
|
+
sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,".")
|
|
730
728
|
return 0
|
|
731
729
|
|
|
732
730
|
|
|
@@ -736,14 +734,13 @@ def OCRAnalysisOfRepository() -> int:
|
|
|
736
734
|
parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
|
|
737
735
|
parser.add_argument('-l', '--languages', required=False, default="eng")
|
|
738
736
|
parser.add_argument('-f', '--folder', required=False, default=None)
|
|
739
|
-
parser.add_argument('-d', '--datafolder', required=False, default=None)
|
|
740
737
|
args = parser.parse_args()
|
|
741
738
|
sc = ScriptCollectionCore()
|
|
742
739
|
if args.folder is None:
|
|
743
740
|
args.folder = os.getcwd()
|
|
744
741
|
languages=args.languages.split(",")
|
|
745
742
|
extensions=args.extensions.split(",")
|
|
746
|
-
sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages
|
|
743
|
+
sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages)
|
|
747
744
|
return 0
|
|
748
745
|
|
|
749
746
|
|
|
@@ -37,7 +37,7 @@ from .ProgramRunnerBase import ProgramRunnerBase
|
|
|
37
37
|
from .ProgramRunnerPopen import ProgramRunnerPopen
|
|
38
38
|
from .SCLog import SCLog, LogLevel
|
|
39
39
|
|
|
40
|
-
version = "4.2.
|
|
40
|
+
version = "4.2.73"
|
|
41
41
|
__version__ = version
|
|
42
42
|
|
|
43
43
|
class VSCodeWorkspaceShellTask:
|
|
@@ -2674,7 +2674,14 @@ TXDX
|
|
|
2674
2674
|
self.run_program_argsasarray("pip", arguments, folder,print_live_output=self.log.loglevel==LogLevel.Debug)
|
|
2675
2675
|
|
|
2676
2676
|
@GeneralUtilities.check_arguments
|
|
2677
|
-
def
|
|
2677
|
+
def ocr_analysis_of_folder_using_local_docker_image(self, folder: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
|
|
2678
|
+
#TODO start docker server
|
|
2679
|
+
serviceaddress:str=None#TODO
|
|
2680
|
+
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, base_folder_for_entry,ignore_pattern)
|
|
2681
|
+
#TODO stop docker server
|
|
2682
|
+
|
|
2683
|
+
@GeneralUtilities.check_arguments
|
|
2684
|
+
def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
|
|
2678
2685
|
supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif', 'pdf', 'docx', 'doc', 'xlsx', 'xls', 'pptx', 'ppt']
|
|
2679
2686
|
changes_files: list[str] = []
|
|
2680
2687
|
if base_folder_for_entry is None:
|
|
@@ -2687,13 +2694,13 @@ TXDX
|
|
|
2687
2694
|
file_lower = file.lower()
|
|
2688
2695
|
for extension in extensions:
|
|
2689
2696
|
if file_lower.endswith("."+extension):
|
|
2690
|
-
if self.ocr_analysis_of_file(file, serviceaddress, languages,
|
|
2697
|
+
if self.ocr_analysis_of_file(file, serviceaddress, languages,base_folder_for_entry):
|
|
2691
2698
|
changes_files.append(file)
|
|
2692
2699
|
break
|
|
2693
2700
|
for subfolder in GeneralUtilities.get_direct_folders_of_folder(folder):
|
|
2694
2701
|
if GeneralUtilities.is_ignored_by_glob_pattern(os.path.dirname(subfolder),subfolder,ignore_pattern):
|
|
2695
2702
|
continue
|
|
2696
|
-
for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,
|
|
2703
|
+
for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
|
|
2697
2704
|
changes_files.append(file)
|
|
2698
2705
|
return changes_files
|
|
2699
2706
|
|
|
@@ -2707,8 +2714,7 @@ TXDX
|
|
|
2707
2714
|
return False
|
|
2708
2715
|
|
|
2709
2716
|
@GeneralUtilities.check_arguments
|
|
2710
|
-
def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str],
|
|
2711
|
-
GeneralUtilities.write_message_to_stdout(f"Starting OCR analysis of file {file}...")
|
|
2717
|
+
def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
|
|
2712
2718
|
supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'webp', 'gif', 'pdf', 'rtf', 'docx', 'doc', 'odt', 'xlsx', 'xls', 'ods', 'pptx', 'ppt', 'odp']
|
|
2713
2719
|
if not self.__it_supported_extension(file, supported_extensions):
|
|
2714
2720
|
raise ValueError(f"File '{file}' is not supported due to unsupported extension. Supported extensions are: {', '.join(supported_extensions)}")
|
|
@@ -2722,7 +2728,8 @@ TXDX
|
|
|
2722
2728
|
return False
|
|
2723
2729
|
except:
|
|
2724
2730
|
pass
|
|
2725
|
-
|
|
2731
|
+
GeneralUtilities.write_message_to_stdout(f"Starting OCR-analysis of file \"{file}\"...")
|
|
2732
|
+
ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages)
|
|
2726
2733
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
2727
2734
|
if readable_folder_entry is None:
|
|
2728
2735
|
readable_folder_entry="."
|
|
@@ -2734,30 +2741,44 @@ OCR-content:
|
|
|
2734
2741
|
return True
|
|
2735
2742
|
|
|
2736
2743
|
@GeneralUtilities.check_arguments
|
|
2737
|
-
def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]
|
|
2744
|
+
def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]) -> str:
|
|
2738
2745
|
result: str = None
|
|
2739
|
-
extension = Path(file).suffix
|
|
2746
|
+
extension = Path(file).suffix[1:]
|
|
2747
|
+
mime_types = {
|
|
2748
|
+
"pdf": "application/pdf",
|
|
2749
|
+
"png": "image/png",
|
|
2750
|
+
"jpg": "image/jpeg",
|
|
2751
|
+
"jpeg": "image/jpeg",
|
|
2752
|
+
"txt": "text/plain",
|
|
2753
|
+
"json": "application/json",
|
|
2754
|
+
}
|
|
2740
2755
|
if serviceaddress is None:
|
|
2741
|
-
|
|
2742
|
-
if
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2756
|
+
server_url_file:str= GeneralUtilities.normalize_path(f"{str(Path.home())}/.ScriptCollection/OCR/ServiceURL.txt")
|
|
2757
|
+
if os.path.isfile(server_url_file):
|
|
2758
|
+
for line in GeneralUtilities.read_nonempty_lines_from_file(server_url_file):
|
|
2759
|
+
if not line.startswith("#"):
|
|
2760
|
+
serviceaddress = line.strip()
|
|
2761
|
+
break
|
|
2762
|
+
GeneralUtilities.assert_not_null(serviceaddress, "ocr-service-address must not be null.")
|
|
2763
|
+
mime_type = mime_types.get(extension.lower(), "application/octet-stream")
|
|
2764
|
+
service_url: str = f"{serviceaddress}/API/v1/SimpleOCR/GetOCRContent?mimeType={mime_type}"
|
|
2765
|
+
for language in languages:
|
|
2766
|
+
service_url = service_url + f"&languages={language}"
|
|
2767
|
+
headers = {'Cache-Control': 'no-cache'}
|
|
2768
|
+
with open(file, "rb") as f:
|
|
2769
|
+
files_to_analyse = {
|
|
2770
|
+
"fileContent": (os.path.basename(file), f, mime_type)
|
|
2771
|
+
}
|
|
2772
|
+
r = requests.put(service_url, timeout=600, headers=headers, files=files_to_analyse)
|
|
2752
2773
|
if r.status_code != 200:
|
|
2753
2774
|
raise ValueError(f"Checking for latest tor package resulted in HTTP-response-code {r.status_code}.")
|
|
2754
2775
|
result = GeneralUtilities.bytes_to_string(r.content)
|
|
2755
2776
|
return result
|
|
2756
2777
|
|
|
2757
2778
|
@GeneralUtilities.check_arguments
|
|
2758
|
-
def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]
|
|
2779
|
+
def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]) -> None:
|
|
2759
2780
|
self.assert_is_git_repository(folder)
|
|
2760
|
-
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,
|
|
2781
|
+
self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,".",[".git"])
|
|
2761
2782
|
|
|
2762
2783
|
@GeneralUtilities.check_arguments
|
|
2763
2784
|
def update_timestamp_in_file(self, target_file: str) -> None:
|
|
@@ -3299,7 +3320,7 @@ OCR-content:
|
|
|
3299
3320
|
if os.path.isabs(target_file):
|
|
3300
3321
|
target_file=GeneralUtilities.resolve_relative_path(target_file,repository_folder)
|
|
3301
3322
|
target_file=GeneralUtilities.normalize_path(target_file)
|
|
3302
|
-
files=self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)
|
|
3323
|
+
files=[path.replace("\\","/") for path in self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)]
|
|
3303
3324
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
3304
3325
|
GeneralUtilities.write_lines_to_file(target_file, files)
|
|
3305
3326
|
|
|
@@ -3322,7 +3343,7 @@ OCR-content:
|
|
|
3322
3343
|
GeneralUtilities.ensure_file_exists(target_file)
|
|
3323
3344
|
GeneralUtilities.write_lines_to_file(target_file, commits)
|
|
3324
3345
|
|
|
3325
|
-
|
|
3326
3346
|
@GeneralUtilities.check_arguments
|
|
3327
3347
|
def is_runnning_in_container(self) ->bool:
|
|
3348
|
+
"""this function is based on a convention and does not do a real check."""
|
|
3328
3349
|
return os.environ.get("ISRUNNINGINCONTAINER") == "true"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
ScriptCollection/AnionBuildPlatform.py,sha256=K-PHarX802A0PU8uRu0GNcEZiXujFoXHACe-X9YJsAQ,11711
|
|
2
2
|
ScriptCollection/CertificateUpdater.py,sha256=Pa6eyjQSx7IIvj4PQVMI0IwMs01KQrNSB7Qa-7lRfBs,9375
|
|
3
|
-
ScriptCollection/Executables.py,sha256=
|
|
3
|
+
ScriptCollection/Executables.py,sha256=SsA3zeDL8QEsh7GxjDjatv5P4eFeDBPqB0F8pNXoYzA,44234
|
|
4
4
|
ScriptCollection/GeneralUtilities.py,sha256=3Fgp0fAXF-rfcohy6k1RsRcMXEVRF15fHl8QJnViKIg,65497
|
|
5
5
|
ScriptCollection/HTTPMaintenanceOverheadHelper.py,sha256=TToNtyO1XzsMbBsTBf3o0xgOK0v4Jf03qw2Z0xb2nCk,2007
|
|
6
6
|
ScriptCollection/ProcessesRunner.py,sha256=o5raxIt3lknNPoPrjNzJ2bprRPJ3SnL0rrR7crraD7E,1523
|
|
@@ -9,7 +9,7 @@ ScriptCollection/ProgramRunnerMock.py,sha256=uTu-aFle1W_oKjeQEmuPsFPQpvo0kRf2FrR
|
|
|
9
9
|
ScriptCollection/ProgramRunnerPopen.py,sha256=BPY7-ZMIlqT7JOKz8qlB5c0laF2Js-ijzqk09GxZC48,3821
|
|
10
10
|
ScriptCollection/ProgramRunnerSudo.py,sha256=_khC3xuTdrPoLluBJZWfldltmmuKltABJPcbjZSFW-4,4835
|
|
11
11
|
ScriptCollection/SCLog.py,sha256=8TRy1LeYMsPOIuWUcnUNNbO5pd-cNBS-3cn-kdzP8FU,4768
|
|
12
|
-
ScriptCollection/ScriptCollectionCore.py,sha256=
|
|
12
|
+
ScriptCollection/ScriptCollectionCore.py,sha256=kqXL6UCd9VVpF7YQHYNjOediKZX0PIX2AMKEAXU09do,181498
|
|
13
13
|
ScriptCollection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
ScriptCollection/OCIImages/AbstractImageHandler.py,sha256=83qDMILwxhH9DbC0sb358Vu8PXEysmJJyap_6gECZqs,1627
|
|
15
15
|
ScriptCollection/OCIImages/OCIImageManager.py,sha256=aBogkSXNDyi8NO11N-s03nuFJEv7PyJ-wjHuYYeZfvs,6662
|
|
@@ -47,8 +47,8 @@ ScriptCollection/TFCPS/NodeJS/TFCPS_CodeUnitSpecific_NodeJS.py,sha256=GQLE6FeR-X
|
|
|
47
47
|
ScriptCollection/TFCPS/NodeJS/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
48
|
ScriptCollection/TFCPS/Python/TFCPS_CodeUnitSpecific_Python.py,sha256=9XK7XnbeOnq_4siVoWovogStoKFiZLhGh3C_f2YaznI,13621
|
|
49
49
|
ScriptCollection/TFCPS/Python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
-
scriptcollection-4.2.
|
|
51
|
-
scriptcollection-4.2.
|
|
52
|
-
scriptcollection-4.2.
|
|
53
|
-
scriptcollection-4.2.
|
|
54
|
-
scriptcollection-4.2.
|
|
50
|
+
scriptcollection-4.2.73.dist-info/METADATA,sha256=P5a45qv1brID3Ot2mRgr2_qmrygBCaZhxPKdS_nCKx0,7691
|
|
51
|
+
scriptcollection-4.2.73.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
52
|
+
scriptcollection-4.2.73.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
|
|
53
|
+
scriptcollection-4.2.73.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
|
|
54
|
+
scriptcollection-4.2.73.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|