ocrd 3.3.2__py3-none-any.whl → 3.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -67,6 +67,12 @@ Variables:
67
67
  \b
68
68
  {config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
69
69
  \b
70
+ {config.describe('OCRD_MAX_MISSING_OUTPUTS')}
71
+ \b
72
+ {config.describe('OCRD_MAX_PARALLEL_PAGES')}
73
+ \b
74
+ {config.describe('OCRD_PROCESSING_PAGE_TIMEOUT')}
75
+ \b
70
76
  {config.describe('OCRD_METS_CACHING')}
71
77
  \b
72
78
  {config.describe('OCRD_MAX_PROCESSOR_CACHE')}
ocrd/cli/ocrd_tool.py CHANGED
@@ -18,7 +18,8 @@ from ocrd.processor import Processor
18
18
  from ocrd_utils import (
19
19
  set_json_key_value_overrides,
20
20
  parse_json_string_or_file,
21
- parse_json_string_with_comments as loads
21
+ parse_json_string_with_comments as loads,
22
+ get_moduledir
22
23
  )
23
24
  from ocrd_validators import ParameterValidator, OcrdToolValidator
24
25
 
@@ -104,6 +105,13 @@ def ocrd_tool_list(ctx):
104
105
  def ocrd_tool_dump(ctx):
105
106
  print(dumps(ctx.json['tools'], indent=True))
106
107
 
108
+ @ocrd_tool_cli.command('dump-module-dirs', help="Dump module directory of each tool")
109
+ @pass_ocrd_tool
110
+ def ocrd_tool_dump_module_dirs(ctx):
111
+ print(dumps({tool_name: get_moduledir(tool_name)
112
+ for tool_name in ctx.json['tools']},
113
+ indent=True))
114
+
107
115
  # ----------------------------------------------------------------------
108
116
  # ocrd ocrd-tool tool
109
117
  # ----------------------------------------------------------------------
ocrd/cli/workspace.py CHANGED
@@ -474,11 +474,11 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
474
474
  if wait:
475
475
  time.sleep(wait)
476
476
  if undo_download and f.url and f.local_filename:
477
- f.local_filename = None
478
477
  modified_mets = True
479
478
  if not keep_files:
480
479
  ctx.log.debug("rm %s [cwd=%s]", f.local_filename, workspace.directory)
481
480
  unlink(f.local_filename)
481
+ f.local_filename = None
482
482
  ret_entry = [f.ID if field == 'pageId' else str(getattr(f, field)) or '' for field in output_field]
483
483
  ret.append(ret_entry)
484
484
  if modified_mets:
@@ -56,7 +56,7 @@ def ocrd_cli_options(f):
56
56
  # subcommands. So we have to work around that by creating a
57
57
  # pseudo-subcommand handled in ocrd_cli_wrap_processor
58
58
  argument('subcommand', nargs=1, required=False,
59
- type=click.Choice([AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER])),
59
+ type=click.Choice(list(map(str, AgentType)))),
60
60
  ]
61
61
  for param in params:
62
62
  param(f)
ocrd/mets_server.py CHANGED
@@ -46,7 +46,7 @@ class OcrdFileModel(BaseModel):
46
46
  ):
47
47
  return OcrdFileModel(
48
48
  file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url,
49
- local_filename=str(local_filename)
49
+ local_filename=str(local_filename) if local_filename else None
50
50
  )
51
51
 
52
52
 
@@ -314,7 +314,7 @@ class ClientSideOcrdMets:
314
314
  else:
315
315
  r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, kwargs))
316
316
  if not r.ok:
317
- raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()[errors]}")
317
+ raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()}")
318
318
 
319
319
  return ClientSideOcrdFile(
320
320
  None, fileGrp=file_grp,
ocrd/processor/base.py CHANGED
@@ -29,8 +29,7 @@ from frozendict import frozendict
29
29
  # this is where the fixes came from:
30
30
  from loky import Future, ProcessPoolExecutor
31
31
  import multiprocessing as mp
32
- from threading import Timer
33
- from _thread import interrupt_main
32
+ from multiprocessing.pool import ThreadPool
34
33
 
35
34
  from click import wrap_text
36
35
  from deprecated import deprecated
@@ -783,11 +782,16 @@ class Processor():
783
782
  page_id = input_files[input_pos].pageId
784
783
  self._base_logger.info("processing page %s", page_id)
785
784
  for i, input_file in enumerate(input_files):
785
+ grp = self.input_file_grp.split(',')[i]
786
786
  if input_file is None:
787
- grp = self.input_file_grp.split(',')[i]
788
787
  self._base_logger.debug(f"ignoring missing file for input fileGrp {grp} for page {page_id}")
789
788
  continue
790
789
  assert isinstance(input_file, get_args(OcrdFileType))
790
+ if not input_file.local_filename:
791
+ self._base_logger.error(f'No local file exists for page {page_id} in file group {grp}')
792
+ if config.OCRD_MISSING_INPUT == 'ABORT':
793
+ raise MissingInputFile(grp, page_id, input_file.mimetype)
794
+ continue
791
795
  self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
792
796
  try:
793
797
  page_ = page_from_file(input_file)
@@ -796,6 +800,9 @@ class Processor():
796
800
  except ValueError as err:
797
801
  # not PAGE and not an image to generate PAGE for
798
802
  self._base_logger.error(f"non-PAGE input for page {page_id}: {err}")
803
+ if not any(input_pcgts):
804
+ self._base_logger.warning(f'skipping page {page_id}')
805
+ return
799
806
  output_file_id = make_file_id(input_files[input_pos], self.output_file_grp)
800
807
  if input_files[input_pos].fileGrp == self.output_file_grp:
801
808
  # input=output fileGrp: re-use ID exactly
@@ -1107,7 +1114,11 @@ class Processor():
1107
1114
  self._base_logger.critical(f"Could not find any files for selected pageId {self.page_id}.\n"
1108
1115
  f"compare '{self.page_id}' with the output of 'orcd workspace list-page'.")
1109
1116
  ifts = []
1110
- for page, ifiles in pages.items():
1117
+ # use physical page order
1118
+ for page in self.workspace.mets.physical_pages:
1119
+ if page not in pages:
1120
+ continue
1121
+ ifiles = pages[page]
1111
1122
  for i, ifg in enumerate(ifgs):
1112
1123
  if not ifiles[i]:
1113
1124
  # could be from non-unique with on_error=skip or from true gap
@@ -1150,18 +1161,15 @@ def _page_worker(timeout, *input_files):
1150
1161
  """
1151
1162
  page_id = next((file.pageId for file in input_files
1152
1163
  if hasattr(file, 'pageId')), "")
1153
- if timeout > 0:
1154
- timer = Timer(timeout, interrupt_main)
1155
- timer.start()
1164
+ pool = ThreadPool(processes=1)
1156
1165
  try:
1157
- _page_worker_processor.process_page_file(*input_files)
1166
+ #_page_worker_processor.process_page_file(*input_files)
1167
+ async_result = pool.apply_async(_page_worker_processor.process_page_file, input_files)
1168
+ async_result.get(timeout or None)
1158
1169
  _page_worker_processor.logger.debug("page worker completed for page %s", page_id)
1159
- except KeyboardInterrupt:
1170
+ except mp.TimeoutError:
1160
1171
  _page_worker_processor.logger.debug("page worker timed out for page %s", page_id)
1161
- raise TimeoutError()
1162
- finally:
1163
- if timeout > 0:
1164
- timer.cancel()
1172
+ raise
1165
1173
 
1166
1174
  def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None):
1167
1175
  """Generate a string describing the full CLI of this processor including params.
ocrd/workspace.py CHANGED
@@ -5,7 +5,7 @@ from shutil import copyfileobj
5
5
  from re import sub
6
6
  from tempfile import NamedTemporaryFile
7
7
  from contextlib import contextmanager
8
- from typing import Optional, Union
8
+ from typing import Optional, Union, Callable
9
9
 
10
10
  from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor
11
11
  from PIL import Image
@@ -457,6 +457,20 @@ class Workspace():
457
457
  with atomic_write(self.mets_target) as f:
458
458
  f.write(self.mets.to_xml(xmllint=True).decode('utf-8'))
459
459
 
460
+ def _apply_mets_file(self, filename_or_url: str, fun: Callable):
461
+ if not filename_or_url:
462
+ # avoid "finding" just any file
463
+ raise ValueError("requires non-empty filename or URL")
464
+ with pushd_popd(self.directory):
465
+ if Path(filename_or_url).exists():
466
+ return fun(filename_or_url)
467
+ if image_file := next(self.mets.find_files(local_filename=str(filename_or_url)), None):
468
+ return fun(image_file.local_filename)
469
+ if image_file := next(self.mets.find_files(url=str(filename_or_url)), None):
470
+ return fun(self.download_file(image_file).local_filename)
471
+ with download_temporary_file(filename_or_url) as f:
472
+ return fun(f.name)
473
+
460
474
  def resolve_image_exif(self, image_url):
461
475
  """
462
476
  Get the EXIF metadata about an image URL as :py:class:`ocrd_models.ocrd_exif.OcrdExif`
@@ -467,19 +481,7 @@ class Workspace():
467
481
  Returns:
468
482
  :py:class:`ocrd_models.ocrd_exif.OcrdExif`
469
483
  """
470
- if not image_url:
471
- # avoid "finding" just any file
472
- raise ValueError(f"'image_url' must be a non-empty string, not '{image_url}' ({type(image_url)})")
473
- try:
474
- f = next(self.mets.find_files(local_filename=str(image_url)))
475
- return exif_from_filename(f.local_filename)
476
- except StopIteration:
477
- try:
478
- f = next(self.mets.find_files(url=str(image_url)))
479
- return exif_from_filename(self.download_file(f).local_filename)
480
- except StopIteration:
481
- with download_temporary_file(image_url) as f:
482
- return exif_from_filename(f.name)
484
+ return self._apply_mets_file(image_url, exif_from_filename)
483
485
 
484
486
  @deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment")
485
487
  def resolve_image_as_pil(self, image_url, coords=None):
@@ -498,22 +500,9 @@ class Workspace():
498
500
  return self._resolve_image_as_pil(image_url, coords)
499
501
 
500
502
  def _resolve_image_as_pil(self, image_url, coords=None):
501
- if not image_url:
502
- # avoid "finding" just any file
503
- raise Exception("Cannot resolve empty image path")
504
503
  log = getLogger('ocrd.workspace._resolve_image_as_pil')
505
- with pushd_popd(self.directory):
506
- try:
507
- f = next(self.mets.find_files(local_filename=str(image_url)))
508
- pil_image = Image.open(f.local_filename)
509
- except StopIteration:
510
- try:
511
- f = next(self.mets.find_files(url=str(image_url)))
512
- pil_image = Image.open(self.download_file(f).local_filename)
513
- except StopIteration:
514
- with download_temporary_file(image_url) as f:
515
- pil_image = Image.open(f.name)
516
- pil_image.load() # alloc and give up the FD
504
+ pil_image = self._apply_mets_file(image_url, Image.open)
505
+ pil_image.load() # alloc and give up the FD
517
506
 
518
507
  # Pillow does not properly support higher color depths
519
508
  # (e.g. 16-bit or 32-bit or floating point grayscale),
@@ -788,16 +777,14 @@ class Workspace():
788
777
  raise Exception('Found no AlternativeImage that satisfies all requirements ' +
789
778
  'filename="%s" in page "%s"' % (
790
779
  filename, page_id))
791
- if not all(feature in page_coords['features']
792
- for feature in feature_selector.split(',') if feature):
793
- raise Exception('Found no AlternativeImage that satisfies all requirements ' +
794
- 'selector="%s" in page "%s"' % (
795
- feature_selector, page_id))
796
- if any(feature in page_coords['features']
797
- for feature in feature_filter.split(',') if feature):
798
- raise Exception('Found no AlternativeImage that satisfies all requirements ' +
799
- 'filter="%s" in page "%s"' % (
800
- feature_filter, page_id))
780
+ if (not all(feature in page_coords['features']
781
+ for feature in feature_selector.split(',') if feature) or
782
+ any(feature in page_coords['features']
783
+ for feature in feature_filter.split(',') if feature)):
784
+ raise Exception('Found no AlternativeImage that satisfies all requirements' +
785
+ ' selector="%s"' % feature_selector +
786
+ ' filter="%s"' % feature_filter +
787
+ ' in page "%s"' % page_id)
801
788
  # ensure DPI will be set in image meta-data again
802
789
  if 'DPI' in page_coords:
803
790
  dpi = page_coords['DPI']
@@ -1049,16 +1036,14 @@ class Workspace():
1049
1036
  raise Exception('Found no AlternativeImage that satisfies all requirements ' +
1050
1037
  'filename="%s" in segment "%s"' % (
1051
1038
  filename, segment.id))
1052
- if not all(feature in segment_coords['features']
1053
- for feature in feature_selector.split(',') if feature):
1039
+ if (not all(feature in segment_coords['features']
1040
+ for feature in feature_selector.split(',') if feature) or
1041
+ any(feature in segment_coords['features']
1042
+ for feature in feature_filter.split(',') if feature)):
1054
1043
  raise Exception('Found no AlternativeImage that satisfies all requirements' +
1055
- 'selector="%s" in segment "%s"' % (
1056
- feature_selector, segment.id))
1057
- if any(feature in segment_coords['features']
1058
- for feature in feature_filter.split(',') if feature):
1059
- raise Exception('Found no AlternativeImage that satisfies all requirements ' +
1060
- 'filter="%s" in segment "%s"' % (
1061
- feature_filter, segment.id))
1044
+ ' selector="%s"' % feature_selector +
1045
+ ' filter="%s"' % feature_filter +
1046
+ ' in segment "%s"' % segment.id)
1062
1047
  # ensure DPI will be set in image meta-data again
1063
1048
  if 'DPI' in segment_coords:
1064
1049
  dpi = segment_coords['DPI']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.3.2
3
+ Version: 3.4.1
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -1,32 +1,32 @@
1
1
  ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
2
  ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
3
  ocrd/lib.bash,sha256=le6XqAOEacdjP3JNSlPkxwRH1y0oVjNQM2tX5d6QFO4,10901
4
- ocrd/mets_server.py,sha256=cplsjQ5OMsBRDPLLbN-6qr3VETg6xOX21tdJOxwgCWk,22471
4
+ ocrd/mets_server.py,sha256=o01N5vAdPeu-xCgngcMRQm0bzBOhn_IFMV0AbeXDu9g,22491
5
5
  ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
6
  ocrd/resolver.py,sha256=A7BrZlUGrfJye-etaEuT-fdJFgvQcCxWovjufT-WmRY,15119
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
8
  ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
- ocrd/workspace.py,sha256=-j3X83K0f4vtd5jwfu6_R53RJ2R8gt1HYpyrv8YP2bg,65661
10
+ ocrd/workspace.py,sha256=eLuGSJtOh3y2miKgcF8219YH1RkAaEi-qwXHarz8O8k,64916
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
12
  ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
- ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
13
+ ocrd/cli/__init__.py,sha256=LpQb8ne1nzAq2j52lGWDTZlBCmrLwUsz17PTwJkWNcU,2884
14
14
  ocrd/cli/bashlib.py,sha256=ypFBM3-IULz_IEBx0Y04eGt9VbQWwEWm4ujm9g_hPWY,6009
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
16
  ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
17
- ocrd/cli/ocrd_tool.py,sha256=EyD5VdLm2WTzQnR-hZKpn-D4-dsWr2PIE5IoY1O3mfE,7357
17
+ ocrd/cli/ocrd_tool.py,sha256=MLTqbtBCw8wBSScv2S4_xp5Jyz5fwD7BzUJxI9R4TBw,7653
18
18
  ocrd/cli/process.py,sha256=8KD0i7LT01H9u5CC1vktYMEVpS67da_rp_09_EOECmw,1233
19
19
  ocrd/cli/resmgr.py,sha256=mk8KZweC_7ENAFnC6FvFf7Zv_W1wqJTmk0EMd9XSvf4,10132
20
20
  ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
- ocrd/cli/workspace.py,sha256=bsp6YXEgwABIUFbSENmxV1c4oxRwc2L-BpeDPlYfhHE,40501
21
+ ocrd/cli/workspace.py,sha256=t40r3tnzz0VivhmT0HSeA5-2xdvUleRYvniqLIBGoWs,40501
22
22
  ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
23
  ocrd/decorators/__init__.py,sha256=n2Lb1WLXGlvPrhNTSGZYRqugpa__MZSWV546EmQnTtc,7678
24
24
  ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
25
25
  ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
26
- ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
26
+ ocrd/decorators/ocrd_cli_options.py,sha256=psS7u42mXTOWIXQd9kcrgW7kDnFURHbmZ0946aqBz3A,2659
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=eQzrwPcGxyo0y7jlAF2g5jOabB-jq_LuJfTn87Qxjwk,59929
29
+ ocrd/processor/base.py,sha256=_h0V5FevEPLb1q0zGtShuKXRj_tOWhD0M7_ufn34MPc,60476
30
30
  ocrd/processor/helpers.py,sha256=WFdC5zeB8F7T0FkpJwfTqWsSPNRtBCBUmFLgixw-rYs,10999
31
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -51,9 +51,9 @@ ocrd_models/xpath_functions.py,sha256=AwR8tHf56-mmIksnw_GeOQ760sxNHqK92T7z9OfsEE
51
51
  ocrd_network/__init__.py,sha256=gMejC614J5PPGgXDKBiQS0jt-Jx8qOrLbWH7zt8x8Gs,374
52
52
  ocrd_network/client.py,sha256=rzqtamZ8krRRy-QTO-AeWH8Lr3HhRiQe2R1-Lovd40g,3020
53
53
  ocrd_network/client_utils.py,sha256=VVZMNBgGznh41exZ78S48X3DDwHcWTuOq-LNdxjRvak,5002
54
- ocrd_network/constants.py,sha256=IeNtcU6dqonDE3Zw83_61auhS8X2b8wsjAxYg1zvK-M,1902
54
+ ocrd_network/constants.py,sha256=AAcE6zZQNcNp2oqPD6oIgoVLSs4IHTkg8AS92WCQ6Xo,1968
55
55
  ocrd_network/database.py,sha256=fcft7vdRDoR7vmPL1xNYTIeOg5DwRPcggwYDYxLy5ik,10706
56
- ocrd_network/logging_utils.py,sha256=bO9TQqBXw9CIZEKp8tHXbeE2NuJWMiaQDHzS05b4ajo,2153
56
+ ocrd_network/logging_utils.py,sha256=ijWpM8B943Jx6F0NeK3ggni0198UYjM5NCkYpARLk_E,2472
57
57
  ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
58
58
  ocrd_network/process_helpers.py,sha256=KpkInXsa5bgrxvTOouyMJ0NgJhaz0J9Gjs5sZHBcH64,2373
59
59
  ocrd_network/processing_server.py,sha256=qBiYk4wgTLqhHvbmDWu_F626BfSfyvkoCD-i0ZwsBSE,42109
@@ -89,14 +89,14 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
89
89
  ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
90
90
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
91
91
  ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
92
- ocrd_utils/config.py,sha256=1_8j4kpKK5gxhFKObbBEzmq4JazTtKEkGe14Ch_tpw0,11796
92
+ ocrd_utils/config.py,sha256=PGRnqrDT7lWJCd9ezoAEBJ5CyVJfFnvCEPTLjtKduOQ,12143
93
93
  ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
94
94
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
95
95
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
96
96
  ocrd_utils/introspect.py,sha256=gfBlmeEFuRmRUSgdSK0jOxRpYqDRXl2IAE6gv2MZ6as,1977
97
- ocrd_utils/logging.py,sha256=sHNfih9kBfvKsHdLqMK_ew9Pld1GsRyYlrZHIySujnw,7313
97
+ ocrd_utils/logging.py,sha256=XYTL7DxUvdX4V56jhAYH6PkhjMFOmaa0kf_XkhSTTe0,7816
98
98
  ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
99
- ocrd_utils/os.py,sha256=acRRdDBI8L6BK0Mf773yKEzwdpZSFRBJEKB2crL4EjU,9865
99
+ ocrd_utils/os.py,sha256=tMjikpVXJ8sCgYBOrgjgT3vlR2Pok39nSKysYc6mUQ4,9863
100
100
  ocrd_utils/str.py,sha256=cRgqYILDGOAqWr0qrCrV52I3y4wvpwDVtnBGEUjXNS4,10116
101
101
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
102
102
  ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
@@ -120,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
120
120
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
121
121
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
122
122
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
123
- ocrd-3.3.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
- ocrd-3.3.2.dist-info/METADATA,sha256=ovylNGJP1F1XfiV0cgYS1MXdkJr0YkTWJp9GOrvFYvk,10442
125
- ocrd-3.3.2.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
126
- ocrd-3.3.2.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
- ocrd-3.3.2.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
- ocrd-3.3.2.dist-info/RECORD,,
123
+ ocrd-3.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
+ ocrd-3.4.1.dist-info/METADATA,sha256=bNMeKlfpm41iZkct1OWhmw-aPP1GbuvsB1wXksYq7_Q,10442
125
+ ocrd-3.4.1.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
126
+ ocrd-3.4.1.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
+ ocrd-3.4.1.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
+ ocrd-3.4.1.dist-info/RECORD,,
ocrd_network/constants.py CHANGED
@@ -11,12 +11,16 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
11
11
  SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
12
12
 
13
13
 
14
- class AgentType(str, Enum):
14
+ class StrEnum(str, Enum):
15
+ def __str__(self):
16
+ return self.value
17
+
18
+ class AgentType(StrEnum):
15
19
  PROCESSING_WORKER = "worker"
16
20
  PROCESSOR_SERVER = "server"
17
21
 
18
22
 
19
- class DeployType(str, Enum):
23
+ class DeployType(StrEnum):
20
24
  # Deployed by the Processing Server config file
21
25
  DOCKER = "docker"
22
26
  NATIVE = "native"
@@ -26,7 +30,7 @@ class DeployType(str, Enum):
26
30
 
27
31
 
28
32
  # TODO: Make the states uppercase
29
- class JobState(str, Enum):
33
+ class JobState(StrEnum):
30
34
  # The processing job is cached inside the Processing Server requests cache
31
35
  cached = "CACHED"
32
36
  # The processing job was cancelled due to failed dependencies
@@ -43,7 +47,7 @@ class JobState(str, Enum):
43
47
  unset = "UNSET"
44
48
 
45
49
 
46
- class NetworkLoggingDirs(str, Enum):
50
+ class NetworkLoggingDirs(StrEnum):
47
51
  METS_SERVERS = "mets_servers"
48
52
  PROCESSING_JOBS = "processing_jobs"
49
53
  PROCESSING_SERVERS = "processing_servers"
@@ -51,7 +55,7 @@ class NetworkLoggingDirs(str, Enum):
51
55
  PROCESSOR_SERVERS = "processor_servers"
52
56
 
53
57
 
54
- class ServerApiTags(str, Enum):
58
+ class ServerApiTags(StrEnum):
55
59
  ADMIN = "admin"
56
60
  DISCOVERY = "discovery"
57
61
  PROCESSING = "processing"
@@ -9,11 +9,22 @@ def configure_file_handler_with_formatter(logger: Logger, log_file: Path, mode:
9
9
  file_handler = FileHandler(filename=log_file, mode=mode)
10
10
  file_handler.setFormatter(Formatter(LOG_FORMAT))
11
11
  logger.addHandler(file_handler)
12
+ try:
13
+ log_file.chmod(0o666)
14
+ except PermissionError:
15
+ # if the file exists the permissions are supposed to already fit
16
+ pass
12
17
 
13
18
 
14
19
  def get_root_logging_dir(module_name: NetworkLoggingDirs) -> Path:
15
20
  module_log_dir = Path(config.OCRD_NETWORK_LOGS_ROOT_DIR, module_name.value)
16
- module_log_dir.mkdir(parents=True, exist_ok=True)
21
+ try:
22
+ module_log_dir.mkdir(parents=True, exist_ok=True)
23
+ module_log_dir.chmod(0o777)
24
+ except PermissionError:
25
+ # if the folder exists the permissions are supposed to already fit
26
+ pass
27
+
17
28
  return module_log_dir
18
29
 
19
30
 
ocrd_utils/config.py CHANGED
@@ -277,13 +277,23 @@ config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
277
277
  description="The root directory where all mets server related socket files are created",
278
278
  parser=lambda val: Path(val),
279
279
  default=(True, Path(gettempdir(), "ocrd_network_sockets")))
280
- config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
280
+ config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(mode=0o777, parents=True, exist_ok=True)
281
+ try:
282
+ config.OCRD_NETWORK_SOCKETS_ROOT_DIR.chmod(0o777)
283
+ except PermissionError:
284
+ # if the folder exists the permissions are supposed to already fit
285
+ pass
281
286
 
282
287
  config.add(name="OCRD_NETWORK_LOGS_ROOT_DIR",
283
288
  description="The root directory where all ocrd_network related file logs are stored",
284
289
  parser=lambda val: Path(val),
285
290
  default=(True, Path(gettempdir(), "ocrd_network_logs")))
286
- config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
291
+ config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(mode=0o777, parents=True, exist_ok=True)
292
+ try:
293
+ config.OCRD_NETWORK_LOGS_ROOT_DIR.chmod(0o777)
294
+ except PermissionError:
295
+ # if the folder exists the permissions are supposed to already fit
296
+ pass
287
297
 
288
298
  config.add("HOME",
289
299
  description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
ocrd_utils/logging.py CHANGED
@@ -32,6 +32,7 @@ import logging
32
32
  import logging.config
33
33
  from pathlib import Path
34
34
  import sys
35
+ from os import chmod
35
36
 
36
37
  from .constants import LOG_FORMAT, LOG_TIMEFMT
37
38
  from .config import config
@@ -166,6 +167,15 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
166
167
  if not silent:
167
168
  print(f"[LOGGING] Picked up logging config at {config_file}", file=sys.stderr)
168
169
  logging.config.fileConfig(config_file)
170
+ # Set permission of processing-server logfile to 666 to prevent possible permission erros while logging
171
+ try:
172
+ network_logger = logging.getLogger("ocrd_network")
173
+ for handler in network_logger.handlers:
174
+ if isinstance(handler, logging.FileHandler):
175
+ chmod(handler.baseFilename, 0o666)
176
+ except PermissionError:
177
+ # if the file exists the permissions are supposed to already fit
178
+ pass
169
179
  else:
170
180
  if not silent:
171
181
  print("[LOGGING] Initializing logging with built-in defaults", file=sys.stderr)
ocrd_utils/os.py CHANGED
@@ -254,7 +254,7 @@ def guess_media_type(input_file : str, fallback : str = None, application_xml :
254
254
  if mimetype is None:
255
255
  mimetype = EXT_TO_MIME.get(''.join(Path(input_file).suffixes), fallback)
256
256
  if mimetype is None:
257
- raise ValueError("Could not determine MIME type of input_file must")
257
+ raise ValueError(f"Could not determine MIME type of {input_file}")
258
258
  if mimetype == 'application/xml':
259
259
  mimetype = application_xml
260
260
  return mimetype
File without changes
File without changes