ScriptCollection 4.2.71__py3-none-any.whl → 4.2.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -706,14 +706,13 @@ def OCRAnalysisOfFolder() -> int:
706
706
  parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
707
707
  parser.add_argument('-l', '--languages', required=False, default="eng")
708
708
  parser.add_argument('-f', '--folder', required=False, default=None)
709
- parser.add_argument('-d', '--datafolder', required=False, default=None)
710
709
  args = parser.parse_args()
711
710
  sc = ScriptCollectionCore()
712
711
  if args.folder is None:
713
712
  args.folder = os.getcwd()
714
713
  languages=args.languages.split(",")
715
714
  extensions=args.extensions.split(",")
716
- sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.datafolder,args.folder,[])
715
+ sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.folder,[])
717
716
  return 0
718
717
 
719
718
 
@@ -722,11 +721,10 @@ def OCRAnalysisOfFile() -> int:
722
721
  parser.add_argument('-s', '--serviceaddress', required=False, default=None)
723
722
  parser.add_argument('-l', '--languages', required=False, default="eng")
724
723
  parser.add_argument('-f', '--file', required=True)
725
- parser.add_argument('-d', '--datafolder', required=False, default=None)
726
724
  args = parser.parse_args()
727
725
  sc = ScriptCollectionCore()
728
726
  languages=args.languages.split(",")
729
- sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,args.datafolder,".")
727
+ sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,".")
730
728
  return 0
731
729
 
732
730
 
@@ -736,14 +734,13 @@ def OCRAnalysisOfRepository() -> int:
736
734
  parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
737
735
  parser.add_argument('-l', '--languages', required=False, default="eng")
738
736
  parser.add_argument('-f', '--folder', required=False, default=None)
739
- parser.add_argument('-d', '--datafolder', required=False, default=None)
740
737
  args = parser.parse_args()
741
738
  sc = ScriptCollectionCore()
742
739
  if args.folder is None:
743
740
  args.folder = os.getcwd()
744
741
  languages=args.languages.split(",")
745
742
  extensions=args.extensions.split(",")
746
- sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages,args.datafolder)
743
+ sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages)
747
744
  return 0
748
745
 
749
746
 
@@ -37,7 +37,7 @@ from .ProgramRunnerBase import ProgramRunnerBase
37
37
  from .ProgramRunnerPopen import ProgramRunnerPopen
38
38
  from .SCLog import SCLog, LogLevel
39
39
 
40
- version = "4.2.71"
40
+ version = "4.2.73"
41
41
  __version__ = version
42
42
 
43
43
  class VSCodeWorkspaceShellTask:
@@ -2674,7 +2674,14 @@ TXDX
2674
2674
  self.run_program_argsasarray("pip", arguments, folder,print_live_output=self.log.loglevel==LogLevel.Debug)
2675
2675
 
2676
2676
  @GeneralUtilities.check_arguments
2677
- def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str], datafolder: str,base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2677
+ def ocr_analysis_of_folder_using_local_docker_image(self, folder: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2678
+ #TODO start docker server
2679
+ serviceaddress:str=None#TODO
2680
+ self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, base_folder_for_entry,ignore_pattern)
2681
+ #TODO stop docker server
2682
+
2683
+ @GeneralUtilities.check_arguments
2684
+ def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2678
2685
  supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif', 'pdf', 'docx', 'doc', 'xlsx', 'xls', 'pptx', 'ppt']
2679
2686
  changes_files: list[str] = []
2680
2687
  if base_folder_for_entry is None:
@@ -2687,13 +2694,13 @@ TXDX
2687
2694
  file_lower = file.lower()
2688
2695
  for extension in extensions:
2689
2696
  if file_lower.endswith("."+extension):
2690
- if self.ocr_analysis_of_file(file, serviceaddress, languages,datafolder,base_folder_for_entry):
2697
+ if self.ocr_analysis_of_file(file, serviceaddress, languages,base_folder_for_entry):
2691
2698
  changes_files.append(file)
2692
2699
  break
2693
2700
  for subfolder in GeneralUtilities.get_direct_folders_of_folder(folder):
2694
2701
  if GeneralUtilities.is_ignored_by_glob_pattern(os.path.dirname(subfolder),subfolder,ignore_pattern):
2695
2702
  continue
2696
- for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,datafolder,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
2703
+ for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
2697
2704
  changes_files.append(file)
2698
2705
  return changes_files
2699
2706
 
@@ -2707,8 +2714,7 @@ TXDX
2707
2714
  return False
2708
2715
 
2709
2716
  @GeneralUtilities.check_arguments
2710
- def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], datafolder: str,readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
2711
- GeneralUtilities.write_message_to_stdout(f"Starting OCR analysis of file {file}...")
2717
+ def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
2712
2718
  supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'webp', 'gif', 'pdf', 'rtf', 'docx', 'doc', 'odt', 'xlsx', 'xls', 'ods', 'pptx', 'ppt', 'odp']
2713
2719
  if not self.__it_supported_extension(file, supported_extensions):
2714
2720
  raise ValueError(f"File '{file}' is not supported due to unsupported extension. Supported extensions are: {', '.join(supported_extensions)}")
@@ -2722,7 +2728,8 @@ TXDX
2722
2728
  return False
2723
2729
  except:
2724
2730
  pass
2725
- ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages,datafolder)
2731
+ GeneralUtilities.write_message_to_stdout(f"Starting OCR-analysis of file \"{file}\"...")
2732
+ ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages)
2726
2733
  GeneralUtilities.ensure_file_exists(target_file)
2727
2734
  if readable_folder_entry is None:
2728
2735
  readable_folder_entry="."
@@ -2734,30 +2741,44 @@ OCR-content:
2734
2741
  return True
2735
2742
 
2736
2743
  @GeneralUtilities.check_arguments
2737
- def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str], datafolder: str) -> str: # serviceaddress = None means local executable
2744
+ def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]) -> str:
2738
2745
  result: str = None
2739
- extension = Path(file).suffix
2746
+ extension = Path(file).suffix[1:]
2747
+ mime_types = {
2748
+ "pdf": "application/pdf",
2749
+ "png": "image/png",
2750
+ "jpg": "image/jpeg",
2751
+ "jpeg": "image/jpeg",
2752
+ "txt": "text/plain",
2753
+ "json": "application/json",
2754
+ }
2740
2755
  if serviceaddress is None:
2741
- arguments= ["OCRAnalysis", "--File", file, "--Languages", "+".join(languages)]
2742
- if datafolder is not None:
2743
- arguments.append("--OCRDataFolder")
2744
- arguments.append(datafolder)
2745
- program_result = self.run_program_argsasarray("simpleocrcli",arguments)
2746
- result = program_result[1]
2747
- else:
2748
- languages_for_url = '%2B'.join(languages)
2749
- package_url: str = f"https://{serviceaddress}/GetOCRContent?languages={languages_for_url}&fileType={extension}"
2750
- headers = {'Cache-Control': 'no-cache'}
2751
- r = requests.put(package_url, timeout=5, headers=headers, data=GeneralUtilities.read_binary_from_file(file))
2756
+ server_url_file:str= GeneralUtilities.normalize_path(f"{str(Path.home())}/.ScriptCollection/OCR/ServiceURL.txt")
2757
+ if os.path.isfile(server_url_file):
2758
+ for line in GeneralUtilities.read_nonempty_lines_from_file(server_url_file):
2759
+ if not line.startswith("#"):
2760
+ serviceaddress = line.strip()
2761
+ break
2762
+ GeneralUtilities.assert_not_null(serviceaddress, "ocr-service-address must not be null.")
2763
+ mime_type = mime_types.get(extension.lower(), "application/octet-stream")
2764
+ service_url: str = f"{serviceaddress}/API/v1/SimpleOCR/GetOCRContent?mimeType={mime_type}"
2765
+ for language in languages:
2766
+ service_url = service_url + f"&languages={language}"
2767
+ headers = {'Cache-Control': 'no-cache'}
2768
+ with open(file, "rb") as f:
2769
+ files_to_analyse = {
2770
+ "fileContent": (os.path.basename(file), f, mime_type)
2771
+ }
2772
+ r = requests.put(service_url, timeout=600, headers=headers, files=files_to_analyse)
2752
2773
  if r.status_code != 200:
2753
2774
  raise ValueError(f"Checking for latest tor package resulted in HTTP-response-code {r.status_code}.")
2754
2775
  result = GeneralUtilities.bytes_to_string(r.content)
2755
2776
  return result
2756
2777
 
2757
2778
  @GeneralUtilities.check_arguments
2758
- def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str], datafolder: str) -> None:
2779
+ def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]) -> None:
2759
2780
  self.assert_is_git_repository(folder)
2760
- self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, datafolder,".",[".git"])
2781
+ self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,".",[".git"])
2761
2782
 
2762
2783
  @GeneralUtilities.check_arguments
2763
2784
  def update_timestamp_in_file(self, target_file: str) -> None:
@@ -3299,7 +3320,7 @@ OCR-content:
3299
3320
  if os.path.isabs(target_file):
3300
3321
  target_file=GeneralUtilities.resolve_relative_path(target_file,repository_folder)
3301
3322
  target_file=GeneralUtilities.normalize_path(target_file)
3302
- files=self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)
3323
+ files=[path.replace("\\","/") for path in self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)]
3303
3324
  GeneralUtilities.ensure_file_exists(target_file)
3304
3325
  GeneralUtilities.write_lines_to_file(target_file, files)
3305
3326
 
@@ -3322,7 +3343,7 @@ OCR-content:
3322
3343
  GeneralUtilities.ensure_file_exists(target_file)
3323
3344
  GeneralUtilities.write_lines_to_file(target_file, commits)
3324
3345
 
3325
-
3326
3346
  @GeneralUtilities.check_arguments
3327
3347
  def is_runnning_in_container(self) ->bool:
3348
+ """this function is based on a convention and does not do a real check."""
3328
3349
  return os.environ.get("ISRUNNINGINCONTAINER") == "true"
@@ -34,6 +34,7 @@ class TFCPS_Generic_CLI:
34
34
  def parse(file:str)->TFCPS_Generic_Functions:
35
35
  parser = argparse.ArgumentParser()
36
36
  verbosity_values = ", ".join(f"{lvl.value}={lvl.name}" for lvl in LogLevel)
37
+ parser.add_argument('-r', '--repository', required=False, default=None)
37
38
  parser.add_argument('-e', '--targetenvironmenttype', required=False, default="QualityCheck")
38
39
  parser.add_argument('-a', '--additionalargumentsfile', required=False, default=None)
39
40
  parser.add_argument('-v', '--verbosity', required=False, default=3, help=f"Sets the loglevel. Possible values: {verbosity_values}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ScriptCollection
3
- Version: 4.2.71
3
+ Version: 4.2.73
4
4
  Summary: The ScriptCollection is the place for reusable scripts.
5
5
  Home-page: https://github.com/anionDev/ScriptCollection
6
6
  Author: Marius Göcke
@@ -22,7 +22,7 @@ Classifier: Topic :: Terminals
22
22
  Classifier: Topic :: Utilities
23
23
  Requires-Python: >=3.10
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: build>=1.4.2
25
+ Requires-Dist: build>=1.4.3
26
26
  Requires-Dist: coverage>=7.13.5
27
27
  Requires-Dist: cyclonedx-bom>=7.1.0
28
28
  Requires-Dist: defusedxml>=0.7.1
@@ -36,7 +36,7 @@ Requires-Dist: pycdlib>=1.14.0
36
36
  Requires-Dist: Pygments>=2.20.0
37
37
  Requires-Dist: pylint>=4.0.5
38
38
  Requires-Dist: pyOpenSSL>=25.3.0
39
- Requires-Dist: PyPDF>=6.9.2
39
+ Requires-Dist: PyPDF>=6.10.2
40
40
  Requires-Dist: pytest>=8.4.2
41
41
  Requires-Dist: PyYAML>=6.0.3
42
42
  Requires-Dist: qrcode>=8.2.0
@@ -1,6 +1,6 @@
1
1
  ScriptCollection/AnionBuildPlatform.py,sha256=K-PHarX802A0PU8uRu0GNcEZiXujFoXHACe-X9YJsAQ,11711
2
2
  ScriptCollection/CertificateUpdater.py,sha256=Pa6eyjQSx7IIvj4PQVMI0IwMs01KQrNSB7Qa-7lRfBs,9375
3
- ScriptCollection/Executables.py,sha256=qpo0g5peWdlK5uLIUCyLDB9c3JCk0ETbtmOJXZwuHh4,44510
3
+ ScriptCollection/Executables.py,sha256=SsA3zeDL8QEsh7GxjDjatv5P4eFeDBPqB0F8pNXoYzA,44234
4
4
  ScriptCollection/GeneralUtilities.py,sha256=3Fgp0fAXF-rfcohy6k1RsRcMXEVRF15fHl8QJnViKIg,65497
5
5
  ScriptCollection/HTTPMaintenanceOverheadHelper.py,sha256=TToNtyO1XzsMbBsTBf3o0xgOK0v4Jf03qw2Z0xb2nCk,2007
6
6
  ScriptCollection/ProcessesRunner.py,sha256=o5raxIt3lknNPoPrjNzJ2bprRPJ3SnL0rrR7crraD7E,1523
@@ -9,7 +9,7 @@ ScriptCollection/ProgramRunnerMock.py,sha256=uTu-aFle1W_oKjeQEmuPsFPQpvo0kRf2FrR
9
9
  ScriptCollection/ProgramRunnerPopen.py,sha256=BPY7-ZMIlqT7JOKz8qlB5c0laF2Js-ijzqk09GxZC48,3821
10
10
  ScriptCollection/ProgramRunnerSudo.py,sha256=_khC3xuTdrPoLluBJZWfldltmmuKltABJPcbjZSFW-4,4835
11
11
  ScriptCollection/SCLog.py,sha256=8TRy1LeYMsPOIuWUcnUNNbO5pd-cNBS-3cn-kdzP8FU,4768
12
- ScriptCollection/ScriptCollectionCore.py,sha256=CHXFzsT2f1gfgLe_TDiIICDKQi-AUD5LUjVH9Lc6A5k,180421
12
+ ScriptCollection/ScriptCollectionCore.py,sha256=kqXL6UCd9VVpF7YQHYNjOediKZX0PIX2AMKEAXU09do,181498
13
13
  ScriptCollection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ScriptCollection/OCIImages/AbstractImageHandler.py,sha256=83qDMILwxhH9DbC0sb358Vu8PXEysmJJyap_6gECZqs,1627
15
15
  ScriptCollection/OCIImages/OCIImageManager.py,sha256=aBogkSXNDyi8NO11N-s03nuFJEv7PyJ-wjHuYYeZfvs,6662
@@ -26,7 +26,7 @@ ScriptCollection/TFCPS/TFCPS_CodeUnitSpecific_Base.py,sha256=EMNDgkUem87aH1csz7e
26
26
  ScriptCollection/TFCPS/TFCPS_CodeUnit_BuildCodeUnit.py,sha256=MieeAAKm1y58qXp1cCUN5mj5vwpq4Pc1uusUjPQzpvc,8237
27
27
  ScriptCollection/TFCPS/TFCPS_CodeUnit_BuildCodeUnits.py,sha256=3Mo8XrTDD1Z7_K1PLxSPYA-fePQ95sIAeNKGj0matiw,15109
28
28
  ScriptCollection/TFCPS/TFCPS_CreateRelease.py,sha256=yqGstRjRfVVGbqrcBgtYStqth2x2SvBb3y2Ht8GsuMQ,6554
29
- ScriptCollection/TFCPS/TFCPS_Generic.py,sha256=O-0guM_LJCcZmPZJhMgTvXD2RXUJEBWWv6Bt6hDFhvM,1943
29
+ ScriptCollection/TFCPS/TFCPS_Generic.py,sha256=Tpzgiz6m3-cYCkObZOG5Uu7oM-EMoWFzzRpLl3Lblqo,2023
30
30
  ScriptCollection/TFCPS/TFCPS_MergeToMain.py,sha256=-Ev9D3bZDlUk2WFQhcmvzQ3FCS97OdsVUd0koAdmpZc,7474
31
31
  ScriptCollection/TFCPS/TFCPS_MergeToStable.py,sha256=Ajfy2pLajTuU6UpwItHt4C2a-gLF3gPc4z6BktL3Cio,22163
32
32
  ScriptCollection/TFCPS/TFCPS_PreBuildCodeunitsScript.py,sha256=f0Uq1cA_4LvmL72cal0crrbKF6PcxL13D9wBKuQ1YBw,2328
@@ -47,8 +47,8 @@ ScriptCollection/TFCPS/NodeJS/TFCPS_CodeUnitSpecific_NodeJS.py,sha256=GQLE6FeR-X
47
47
  ScriptCollection/TFCPS/NodeJS/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  ScriptCollection/TFCPS/Python/TFCPS_CodeUnitSpecific_Python.py,sha256=9XK7XnbeOnq_4siVoWovogStoKFiZLhGh3C_f2YaznI,13621
49
49
  ScriptCollection/TFCPS/Python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- scriptcollection-4.2.71.dist-info/METADATA,sha256=AqeIZ0bihEiuUCtCAc4go-OsxfnC8OXweAbFR1sbpQc,7690
51
- scriptcollection-4.2.71.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
52
- scriptcollection-4.2.71.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
53
- scriptcollection-4.2.71.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
54
- scriptcollection-4.2.71.dist-info/RECORD,,
50
+ scriptcollection-4.2.73.dist-info/METADATA,sha256=P5a45qv1brID3Ot2mRgr2_qmrygBCaZhxPKdS_nCKx0,7691
51
+ scriptcollection-4.2.73.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
52
+ scriptcollection-4.2.73.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
53
+ scriptcollection-4.2.73.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
54
+ scriptcollection-4.2.73.dist-info/RECORD,,