ScriptCollection 4.2.72__py3-none-any.whl → 4.2.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -706,14 +706,13 @@ def OCRAnalysisOfFolder() -> int:
706
706
  parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
707
707
  parser.add_argument('-l', '--languages', required=False, default="eng")
708
708
  parser.add_argument('-f', '--folder', required=False, default=None)
709
- parser.add_argument('-d', '--datafolder', required=False, default=None)
710
709
  args = parser.parse_args()
711
710
  sc = ScriptCollectionCore()
712
711
  if args.folder is None:
713
712
  args.folder = os.getcwd()
714
713
  languages=args.languages.split(",")
715
714
  extensions=args.extensions.split(",")
716
- sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.datafolder,args.folder,[])
715
+ sc.ocr_analysis_of_folder(args.folder, args.serviceaddress, extensions, languages,args.folder,[])
717
716
  return 0
718
717
 
719
718
 
@@ -722,11 +721,10 @@ def OCRAnalysisOfFile() -> int:
722
721
  parser.add_argument('-s', '--serviceaddress', required=False, default=None)
723
722
  parser.add_argument('-l', '--languages', required=False, default="eng")
724
723
  parser.add_argument('-f', '--file', required=True)
725
- parser.add_argument('-d', '--datafolder', required=False, default=None)
726
724
  args = parser.parse_args()
727
725
  sc = ScriptCollectionCore()
728
726
  languages=args.languages.split(",")
729
- sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,args.datafolder,".")
727
+ sc.ocr_analysis_of_file(args.file, args.serviceaddress, languages,".")
730
728
  return 0
731
729
 
732
730
 
@@ -736,14 +734,13 @@ def OCRAnalysisOfRepository() -> int:
736
734
  parser.add_argument('-e', '--extensions', required=False, default="pdf,docx,jpg,png,xlsx")
737
735
  parser.add_argument('-l', '--languages', required=False, default="eng")
738
736
  parser.add_argument('-f', '--folder', required=False, default=None)
739
- parser.add_argument('-d', '--datafolder', required=False, default=None)
740
737
  args = parser.parse_args()
741
738
  sc = ScriptCollectionCore()
742
739
  if args.folder is None:
743
740
  args.folder = os.getcwd()
744
741
  languages=args.languages.split(",")
745
742
  extensions=args.extensions.split(",")
746
- sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages,args.datafolder)
743
+ sc.ocr_analysis_of_repository(args.folder, args.serviceaddress, extensions, languages)
747
744
  return 0
748
745
 
749
746
 
@@ -37,7 +37,7 @@ from .ProgramRunnerBase import ProgramRunnerBase
37
37
  from .ProgramRunnerPopen import ProgramRunnerPopen
38
38
  from .SCLog import SCLog, LogLevel
39
39
 
40
- version = "4.2.72"
40
+ version = "4.2.74"
41
41
  __version__ = version
42
42
 
43
43
  class VSCodeWorkspaceShellTask:
@@ -2674,7 +2674,14 @@ TXDX
2674
2674
  self.run_program_argsasarray("pip", arguments, folder,print_live_output=self.log.loglevel==LogLevel.Debug)
2675
2675
 
2676
2676
  @GeneralUtilities.check_arguments
2677
- def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str], datafolder: str,base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2677
+ def ocr_analysis_of_folder_using_local_docker_image(self, folder: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2678
+ #TODO start docker server
2679
+ serviceaddress:str=None#TODO
2680
+ self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, base_folder_for_entry,ignore_pattern)
2681
+ #TODO stop docker server
2682
+
2683
+ @GeneralUtilities.check_arguments
2684
+ def ocr_analysis_of_folder(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str],base_folder_for_entry: str,ignore_pattern:list[str] ) -> list[str]: # Returns a list of changed files due to ocr-analysis.
2678
2685
  supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif', 'pdf', 'docx', 'doc', 'xlsx', 'xls', 'pptx', 'ppt']
2679
2686
  changes_files: list[str] = []
2680
2687
  if base_folder_for_entry is None:
@@ -2687,13 +2694,13 @@ TXDX
2687
2694
  file_lower = file.lower()
2688
2695
  for extension in extensions:
2689
2696
  if file_lower.endswith("."+extension):
2690
- if self.ocr_analysis_of_file(file, serviceaddress, languages,datafolder,base_folder_for_entry):
2697
+ if self.ocr_analysis_of_file(file, serviceaddress, languages,base_folder_for_entry):
2691
2698
  changes_files.append(file)
2692
2699
  break
2693
2700
  for subfolder in GeneralUtilities.get_direct_folders_of_folder(folder):
2694
2701
  if GeneralUtilities.is_ignored_by_glob_pattern(os.path.dirname(subfolder),subfolder,ignore_pattern):
2695
2702
  continue
2696
- for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,datafolder,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
2703
+ for file in self.ocr_analysis_of_folder(subfolder, serviceaddress, extensions, languages,base_folder_for_entry+"/"+os.path.basename(subfolder), ignore_pattern):
2697
2704
  changes_files.append(file)
2698
2705
  return changes_files
2699
2706
 
@@ -2707,8 +2714,7 @@ TXDX
2707
2714
  return False
2708
2715
 
2709
2716
  @GeneralUtilities.check_arguments
2710
- def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], datafolder: str,readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
2711
- GeneralUtilities.write_message_to_stdout(f"Starting OCR analysis of file {file}...")
2717
+ def ocr_analysis_of_file(self, file: str, serviceaddress: str, languages: list[str], readable_folder_entry:str ) -> bool: # Returns true if the ocr-file was generated or updated. Returns false if the existing ocr-file was not changed.
2712
2718
  supported_extensions = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'webp', 'gif', 'pdf', 'rtf', 'docx', 'doc', 'odt', 'xlsx', 'xls', 'ods', 'pptx', 'ppt', 'odp']
2713
2719
  if not self.__it_supported_extension(file, supported_extensions):
2714
2720
  raise ValueError(f"File '{file}' is not supported due to unsupported extension. Supported extensions are: {', '.join(supported_extensions)}")
@@ -2722,7 +2728,8 @@ TXDX
2722
2728
  return False
2723
2729
  except:
2724
2730
  pass
2725
- ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages,datafolder)
2731
+ GeneralUtilities.write_message_to_stdout(f"Starting OCR-analysis of file \"{file}\"...")
2732
+ ocr_content = self.get_ocr_content_of_file(file, serviceaddress, languages)
2726
2733
  GeneralUtilities.ensure_file_exists(target_file)
2727
2734
  if readable_folder_entry is None:
2728
2735
  readable_folder_entry="."
@@ -2734,30 +2741,52 @@ OCR-content:
2734
2741
  return True
2735
2742
 
2736
2743
  @GeneralUtilities.check_arguments
2737
- def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str], datafolder: str) -> str: # serviceaddress = None means local executable
2744
+ def get_ocr_content_of_file(self, file: str, serviceaddress: str, languages: list[str]) -> str:
2738
2745
  result: str = None
2739
- extension = Path(file).suffix
2746
+ extension = Path(file).suffix[1:]
2747
+ mime_types = {
2748
+ "pdf": "application/pdf",
2749
+ "png": "image/png",
2750
+ "jpg": "image/jpeg",
2751
+ "jpeg": "image/jpeg",
2752
+ "txt": "text/plain",
2753
+ "json": "application/json",
2754
+ "doc": "application/msword",
2755
+ "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
2756
+ "xls": "application/vnd.ms-excel",
2757
+ "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
2758
+ }
2740
2759
  if serviceaddress is None:
2741
- arguments= ["OCRAnalysis", "--File", file, "--Languages", "+".join(languages)]
2742
- if datafolder is not None:
2743
- arguments.append("--OCRDataFolder")
2744
- arguments.append(datafolder)
2745
- program_result = self.run_program_argsasarray("simpleocrcli",arguments)
2746
- result = program_result[1]
2747
- else:
2748
- languages_for_url = '%2B'.join(languages)
2749
- package_url: str = f"https://{serviceaddress}/GetOCRContent?languages={languages_for_url}&fileType={extension}"
2750
- headers = {'Cache-Control': 'no-cache'}
2751
- r = requests.put(package_url, timeout=5, headers=headers, data=GeneralUtilities.read_binary_from_file(file))
2760
+ server_url_file:str= GeneralUtilities.normalize_path(f"{str(Path.home())}/.ScriptCollection/OCR/ServiceURL.txt")
2761
+ if os.path.isfile(server_url_file):
2762
+ for line in GeneralUtilities.read_nonempty_lines_from_file(server_url_file):
2763
+ if not line.startswith("#"):
2764
+ serviceaddress = line.strip()
2765
+ break
2766
+ GeneralUtilities.assert_not_null(serviceaddress, "ocr-service-address must not be null.")
2767
+ mime_type = mime_types.get(extension.lower(), "application/octet-stream")
2768
+ service_url: str = f"{serviceaddress}/API/v1/SimpleOCR/GetOCRContent?mimeType={mime_type}"
2769
+ for language in languages:
2770
+ service_url = service_url + f"&languages={language}"
2771
+ headers = {'Cache-Control': 'no-cache'}
2772
+ with open(file, "rb") as f:
2773
+ files_to_analyse = {
2774
+ "fileContent": (os.path.basename(file), f, mime_type)
2775
+ }
2776
+ r = requests.put(service_url, timeout=3600, headers=headers, files=files_to_analyse,verify=True)
2752
2777
  if r.status_code != 200:
2753
- raise ValueError(f"Checking for latest tor package resulted in HTTP-response-code {r.status_code}.")
2778
+ if r.status_code == 400:
2779
+ return f"Could not calculate ocr-content for file \"{file}\". File may be broken."
2780
+ else:
2781
+ raise ValueError(f"Retrieving ocr-content for file \"{file}\" resulted in HTTP-response-code {r.status_code}.")
2782
+
2754
2783
  result = GeneralUtilities.bytes_to_string(r.content)
2755
2784
  return result
2756
2785
 
2757
2786
  @GeneralUtilities.check_arguments
2758
- def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str], datafolder: str) -> None:
2787
+ def ocr_analysis_of_repository(self, folder: str, serviceaddress: str, extensions: list[str], languages: list[str]) -> None:
2759
2788
  self.assert_is_git_repository(folder)
2760
- self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages, datafolder,".",[".git"])
2789
+ self.ocr_analysis_of_folder(folder, serviceaddress, extensions, languages,".",[".git"])
2761
2790
 
2762
2791
  @GeneralUtilities.check_arguments
2763
2792
  def update_timestamp_in_file(self, target_file: str) -> None:
@@ -3299,7 +3328,7 @@ OCR-content:
3299
3328
  if os.path.isabs(target_file):
3300
3329
  target_file=GeneralUtilities.resolve_relative_path(target_file,repository_folder)
3301
3330
  target_file=GeneralUtilities.normalize_path(target_file)
3302
- files=self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)
3331
+ files=[path.replace("\\","/") for path in self.get_all_files_in_git_repository(repository_folder,ignore_ignored_files,include_submodules)]
3303
3332
  GeneralUtilities.ensure_file_exists(target_file)
3304
3333
  GeneralUtilities.write_lines_to_file(target_file, files)
3305
3334
 
@@ -3322,7 +3351,7 @@ OCR-content:
3322
3351
  GeneralUtilities.ensure_file_exists(target_file)
3323
3352
  GeneralUtilities.write_lines_to_file(target_file, commits)
3324
3353
 
3325
-
3326
3354
  @GeneralUtilities.check_arguments
3327
3355
  def is_runnning_in_container(self) ->bool:
3356
+ """this function is based on a convention and does not do a real check."""
3328
3357
  return os.environ.get("ISRUNNINGINCONTAINER") == "true"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ScriptCollection
3
- Version: 4.2.72
3
+ Version: 4.2.74
4
4
  Summary: The ScriptCollection is the place for reusable scripts.
5
5
  Home-page: https://github.com/anionDev/ScriptCollection
6
6
  Author: Marius Göcke
@@ -1,6 +1,6 @@
1
1
  ScriptCollection/AnionBuildPlatform.py,sha256=K-PHarX802A0PU8uRu0GNcEZiXujFoXHACe-X9YJsAQ,11711
2
2
  ScriptCollection/CertificateUpdater.py,sha256=Pa6eyjQSx7IIvj4PQVMI0IwMs01KQrNSB7Qa-7lRfBs,9375
3
- ScriptCollection/Executables.py,sha256=qpo0g5peWdlK5uLIUCyLDB9c3JCk0ETbtmOJXZwuHh4,44510
3
+ ScriptCollection/Executables.py,sha256=SsA3zeDL8QEsh7GxjDjatv5P4eFeDBPqB0F8pNXoYzA,44234
4
4
  ScriptCollection/GeneralUtilities.py,sha256=3Fgp0fAXF-rfcohy6k1RsRcMXEVRF15fHl8QJnViKIg,65497
5
5
  ScriptCollection/HTTPMaintenanceOverheadHelper.py,sha256=TToNtyO1XzsMbBsTBf3o0xgOK0v4Jf03qw2Z0xb2nCk,2007
6
6
  ScriptCollection/ProcessesRunner.py,sha256=o5raxIt3lknNPoPrjNzJ2bprRPJ3SnL0rrR7crraD7E,1523
@@ -9,7 +9,7 @@ ScriptCollection/ProgramRunnerMock.py,sha256=uTu-aFle1W_oKjeQEmuPsFPQpvo0kRf2FrR
9
9
  ScriptCollection/ProgramRunnerPopen.py,sha256=BPY7-ZMIlqT7JOKz8qlB5c0laF2Js-ijzqk09GxZC48,3821
10
10
  ScriptCollection/ProgramRunnerSudo.py,sha256=_khC3xuTdrPoLluBJZWfldltmmuKltABJPcbjZSFW-4,4835
11
11
  ScriptCollection/SCLog.py,sha256=8TRy1LeYMsPOIuWUcnUNNbO5pd-cNBS-3cn-kdzP8FU,4768
12
- ScriptCollection/ScriptCollectionCore.py,sha256=qU8XbRln8Ozaqe_Z4XnU7--XQfmlB0z4RoSupVB01_s,180421
12
+ ScriptCollection/ScriptCollectionCore.py,sha256=5RXTdMUUCb1XvzPEj8JTqQosPQ4YgRufsbXpY6ZEHa8,181964
13
13
  ScriptCollection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ScriptCollection/OCIImages/AbstractImageHandler.py,sha256=83qDMILwxhH9DbC0sb358Vu8PXEysmJJyap_6gECZqs,1627
15
15
  ScriptCollection/OCIImages/OCIImageManager.py,sha256=aBogkSXNDyi8NO11N-s03nuFJEv7PyJ-wjHuYYeZfvs,6662
@@ -47,8 +47,8 @@ ScriptCollection/TFCPS/NodeJS/TFCPS_CodeUnitSpecific_NodeJS.py,sha256=GQLE6FeR-X
47
47
  ScriptCollection/TFCPS/NodeJS/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  ScriptCollection/TFCPS/Python/TFCPS_CodeUnitSpecific_Python.py,sha256=9XK7XnbeOnq_4siVoWovogStoKFiZLhGh3C_f2YaznI,13621
49
49
  ScriptCollection/TFCPS/Python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- scriptcollection-4.2.72.dist-info/METADATA,sha256=eVq5U2dxHmaGEZnjzCZg-9DYI1S6s5sHx3Zbofx7Xfk,7691
51
- scriptcollection-4.2.72.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
52
- scriptcollection-4.2.72.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
53
- scriptcollection-4.2.72.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
54
- scriptcollection-4.2.72.dist-info/RECORD,,
50
+ scriptcollection-4.2.74.dist-info/METADATA,sha256=X82BuSGCRx0eK2pnFKs2wEhh7_xViAovRmeiZpqitfc,7691
51
+ scriptcollection-4.2.74.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
52
+ scriptcollection-4.2.74.dist-info/entry_points.txt,sha256=27XwAJEcaMEc1be0Ec1vKHCbiU4Ziu8jKL-SqsrYOIQ,4680
53
+ scriptcollection-4.2.74.dist-info/top_level.txt,sha256=hY2hOVH0V0Ce51WB76zKkIWTUNwMUdHo4XDkR2vYVwg,17
54
+ scriptcollection-4.2.74.dist-info/RECORD,,