ocrd 3.0.0b6__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -16,7 +16,7 @@ def command_with_replaced_help(*replacements):
16
16
 
17
17
  class CommandWithReplacedHelp(click.Command):
18
18
  def get_help(self, ctx):
19
- newhelp = super().get_help(ctx)
19
+ newhelp : str = super().get_help(ctx)
20
20
  for replacement in replacements:
21
21
  newhelp = re.sub(*replacement, newhelp)
22
22
  # print(newhelp)
@@ -83,6 +83,8 @@ Variables:
83
83
  \b
84
84
  {config.describe('OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS')}
85
85
  \b
86
+ {config.describe('OCRD_NETWORK_RABBITMQ_HEARTBEAT')}
87
+ \b
86
88
  {config.describe('OCRD_PROFILE_FILE')}
87
89
  \b
88
90
  {config.describe('OCRD_PROFILE', wrap_text=False)}
@@ -48,6 +48,9 @@ def ocrd_cli_wrap_processor(
48
48
  # ocrd_network params end #
49
49
  **kwargs
50
50
  ):
51
+ # init logging handlers so no imported libs can preempt ours
52
+ initLogging()
53
+
51
54
  # FIXME: remove workspace arg entirely
52
55
  processor = processorClass(None)
53
56
  if not sys.argv[1:]:
@@ -89,8 +92,6 @@ def ocrd_cli_wrap_processor(
89
92
  # Used for checking/starting network agents for the WebAPI architecture
90
93
  check_and_run_network_agent(processorClass, subcommand, address, database, queue)
91
94
 
92
- # from here: single-run processing context
93
- initLogging()
94
95
  if 'parameter' in kwargs:
95
96
  # Disambiguate parameter file/literal, and resolve file
96
97
  def resolve(name):
ocrd/mets_server.py CHANGED
@@ -1,8 +1,10 @@
1
1
  """
2
2
  # METS server functionality
3
3
  """
4
+ import os
4
5
  import re
5
6
  from os import _exit, chmod
7
+ import signal
6
8
  from typing import Dict, Optional, Union, List, Tuple
7
9
  from time import sleep
8
10
  from pathlib import Path
@@ -155,13 +157,13 @@ class ClientSideOcrdMets:
155
157
  Request writing the changes to the file system
156
158
  """
157
159
  if not self.multiplexing_mode:
158
- self.session.request("PUT", url=self.url)
160
+ return self.session.request("PUT", url=self.url).text
159
161
  else:
160
- self.session.request(
162
+ return self.session.request(
161
163
  "POST",
162
164
  self.url,
163
165
  json=MpxReq.save(self.ws_dir_path)
164
- )
166
+ ).json()["text"]
165
167
 
166
168
  def stop(self):
167
169
  """
@@ -169,14 +171,13 @@ class ClientSideOcrdMets:
169
171
  """
170
172
  try:
171
173
  if not self.multiplexing_mode:
172
- self.session.request("DELETE", self.url)
173
- return
174
+ return self.session.request("DELETE", self.url).text
174
175
  else:
175
- self.session.request(
176
+ return self.session.request(
176
177
  "POST",
177
178
  self.url,
178
179
  json=MpxReq.stop(self.ws_dir_path)
179
- )
180
+ ).json()["text"]
180
181
  except ConnectionError:
181
182
  # Expected because we exit the process without returning
182
183
  pass
@@ -323,7 +324,7 @@ class ClientSideOcrdMets:
323
324
 
324
325
 
325
326
  class MpxReq:
326
- """This class wrapps the request bodies needed for the tcp forwarding
327
+ """This class wraps the request bodies needed for the tcp forwarding
327
328
 
328
329
  For every mets-server-call like find_files or workspace_path a special request_body is
329
330
  needed to call `MetsServerProxy.forward_tcp_request`. These are created by this functions.
@@ -346,12 +347,12 @@ class MpxReq:
346
347
  @staticmethod
347
348
  def save(ws_dir_path: str) -> Dict:
348
349
  return MpxReq.__args_wrapper(
349
- ws_dir_path, method_type="PUT", response_type="empty", request_url="", request_data={})
350
+ ws_dir_path, method_type="PUT", response_type="text", request_url="", request_data={})
350
351
 
351
352
  @staticmethod
352
353
  def stop(ws_dir_path: str) -> Dict:
353
354
  return MpxReq.__args_wrapper(
354
- ws_dir_path, method_type="DELETE", response_type="empty", request_url="", request_data={})
355
+ ws_dir_path, method_type="DELETE", response_type="text", request_url="", request_data={})
355
356
 
356
357
  @staticmethod
357
358
  def reload(ws_dir_path: str) -> Dict:
@@ -428,18 +429,24 @@ class OcrdMetsServer:
428
429
 
429
430
  @staticmethod
430
431
  def kill_process(mets_server_pid: int):
431
- subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
432
+ os.kill(mets_server_pid, signal.SIGINT)
433
+ sleep(3)
434
+ try:
435
+ os.kill(mets_server_pid, signal.SIGKILL)
436
+ except ProcessLookupError as e:
437
+ pass
432
438
 
433
439
  def shutdown(self):
440
+ pid = os.getpid()
441
+ self.log.info(f"Shutdown method of mets server[{pid}] invoked, sending SIGTERM signal.")
442
+ os.kill(pid, signal.SIGTERM)
434
443
  if self.is_uds:
435
444
  if Path(self.url).exists():
436
- self.log.debug(f'UDS socket {self.url} still exists, removing it')
445
+ self.log.warning(f"Due to a server shutdown, removing the existing UDS socket file: {self.url}")
437
446
  Path(self.url).unlink()
438
- # os._exit because uvicorn catches SystemExit raised by sys.exit
439
- _exit(0)
440
447
 
441
448
  def startup(self):
442
- self.log.info("Starting up METS server")
449
+ self.log.info(f"Configuring the Mets Server")
443
450
 
444
451
  workspace = self.workspace
445
452
 
@@ -465,32 +472,49 @@ class OcrdMetsServer:
465
472
  """
466
473
  Write current changes to the file system
467
474
  """
468
- return workspace.save_mets()
475
+ workspace.save_mets()
476
+ response = Response(content="The Mets Server is writing changes to disk.", media_type='text/plain')
477
+ self.log.info(f"PUT / -> {response.__dict__}")
478
+ return response
469
479
 
470
480
  @app.delete(path='/')
471
- async def stop():
481
+ def stop():
472
482
  """
473
483
  Stop the mets server
474
484
  """
475
- getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}')
476
485
  workspace.save_mets()
486
+ response = Response(content="The Mets Server will shut down soon...", media_type='text/plain')
477
487
  self.shutdown()
488
+ self.log.info(f"DELETE / -> {response.__dict__}")
489
+ return response
478
490
 
479
491
  @app.post(path='/reload')
480
- async def workspace_reload_mets():
492
+ def workspace_reload_mets():
481
493
  """
482
494
  Reload mets file from the file system
483
495
  """
484
496
  workspace.reload_mets()
485
- return Response(content=f'Reloaded from {workspace.directory}', media_type="text/plain")
497
+ response = Response(content=f"Reloaded from {workspace.directory}", media_type='text/plain')
498
+ self.log.info(f"POST /reload -> {response.__dict__}")
499
+ return response
486
500
 
487
501
  @app.get(path='/unique_identifier', response_model=str)
488
502
  async def unique_identifier():
489
- return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
503
+ response = Response(content=workspace.mets.unique_identifier, media_type='text/plain')
504
+ self.log.info(f"GET /unique_identifier -> {response.__dict__}")
505
+ return response
490
506
 
491
507
  @app.get(path='/workspace_path', response_model=str)
492
508
  async def workspace_path():
493
- return Response(content=workspace.directory, media_type="text/plain")
509
+ response = Response(content=workspace.directory, media_type="text/plain")
510
+ self.log.info(f"GET /workspace_path -> {response.__dict__}")
511
+ return response
512
+
513
+ @app.get(path='/physical_pages', response_model=OcrdPageListModel)
514
+ async def physical_pages():
515
+ response = {'physical_pages': workspace.mets.physical_pages}
516
+ self.log.info(f"GET /physical_pages -> {response}")
517
+ return response
494
518
 
495
519
  @app.get(path='/physical_pages', response_model=OcrdPageListModel)
496
520
  async def physical_pages():
@@ -498,18 +522,24 @@ class OcrdMetsServer:
498
522
 
499
523
  @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
500
524
  async def file_groups():
501
- return {'file_groups': workspace.mets.file_groups}
525
+ response = {'file_groups': workspace.mets.file_groups}
526
+ self.log.info(f"GET /file_groups -> {response}")
527
+ return response
502
528
 
503
529
  @app.get(path='/agent', response_model=OcrdAgentListModel)
504
530
  async def agents():
505
- return OcrdAgentListModel.create(workspace.mets.agents)
531
+ response = OcrdAgentListModel.create(workspace.mets.agents)
532
+ self.log.info(f"GET /agent -> {response.__dict__}")
533
+ return response
506
534
 
507
535
  @app.post(path='/agent', response_model=OcrdAgentModel)
508
536
  async def add_agent(agent: OcrdAgentModel):
509
537
  kwargs = agent.dict()
510
538
  kwargs['_type'] = kwargs.pop('type')
511
539
  workspace.mets.add_agent(**kwargs)
512
- return agent
540
+ response = agent
541
+ self.log.info(f"POST /agent -> {response.__dict__}")
542
+ return response
513
543
 
514
544
  @app.get(path="/file", response_model=OcrdFileListModel)
515
545
  async def find_files(
@@ -526,7 +556,9 @@ class OcrdMetsServer:
526
556
  found = workspace.mets.find_all_files(
527
557
  fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype, local_filename=local_filename, url=url
528
558
  )
529
- return OcrdFileListModel.create(found)
559
+ response = OcrdFileListModel.create(found)
560
+ self.log.info(f"GET /file -> {response.__dict__}")
561
+ return response
530
562
 
531
563
  @app.post(path='/file', response_model=OcrdFileModel)
532
564
  async def add_file(
@@ -549,7 +581,9 @@ class OcrdMetsServer:
549
581
  # Add to workspace
550
582
  kwargs = file_resource.dict()
551
583
  workspace.add_file(**kwargs, force=force)
552
- return file_resource
584
+ response = file_resource
585
+ self.log.info(f"POST /file -> {response.__dict__}")
586
+ return response
553
587
 
554
588
  # ------------- #
555
589
 
@@ -557,9 +591,6 @@ class OcrdMetsServer:
557
591
  # Create socket and change to world-readable and -writable to avoid permission errors
558
592
  self.log.debug(f"chmod 0o677 {self.url}")
559
593
  server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
560
- if Path(self.url).exists() and not is_socket_in_use(self.url):
561
- # remove leftover unused socket which blocks startup
562
- Path(self.url).unlink()
563
594
  server.bind(self.url) # creates the socket file
564
595
  atexit.register(self.shutdown)
565
596
  server.close()
@@ -571,16 +602,5 @@ class OcrdMetsServer:
571
602
  uvicorn_kwargs['log_config'] = None
572
603
  uvicorn_kwargs['access_log'] = False
573
604
 
574
- self.log.debug("Starting uvicorn")
605
+ self.log.info("Starting the uvicorn Mets Server")
575
606
  uvicorn.run(app, **uvicorn_kwargs)
576
-
577
-
578
- def is_socket_in_use(socket_path):
579
- if Path(socket_path).exists():
580
- client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
581
- try:
582
- client.connect(socket_path)
583
- except OSError:
584
- return False
585
- client.close()
586
- return True
ocrd/processor/base.py CHANGED
@@ -18,10 +18,11 @@ from os import getcwd
18
18
  from pathlib import Path
19
19
  from typing import Any, Dict, List, Optional, Tuple, Union, get_args
20
20
  import sys
21
+ import logging
22
+ import logging.handlers
21
23
  import inspect
22
24
  import tarfile
23
25
  import io
24
- import weakref
25
26
  from collections import defaultdict
26
27
  from frozendict import frozendict
27
28
  # concurrent.futures is buggy in py38,
@@ -129,7 +130,8 @@ class DummyExecutor:
129
130
  def __init__(self, initializer=None, initargs=(), **kwargs):
130
131
  initializer(*initargs)
131
132
  def shutdown(self, **kwargs):
132
- pass
133
+ # allow gc to catch processor instance (unless cached)
134
+ _page_worker_set_ctxt(None, None)
133
135
  def submit(self, fn, *args, **kwargs) -> DummyFuture:
134
136
  return DummyFuture(fn, *args, **kwargs)
135
137
 
@@ -158,12 +160,12 @@ class Processor():
158
160
 
159
161
  max_workers : int = -1
160
162
  """
161
- maximum number of processor threads for page-parallel processing (ignored if negative),
163
+ maximum number of processor forks for page-parallel processing (ignored if negative),
162
164
  to be applied on top of :py:data:`~ocrd_utils.config.OCRD_MAX_PARALLEL_PAGES` (i.e.
163
165
  whatever is smaller).
164
166
 
165
167
  (Override this if you know how many pages fit into processing units - GPU shaders / CPU cores
166
- - at once, or if your class is not thread-safe.)
168
+ - at once, or if your class already creates threads prior to forking, e.g. during ``setup``.)
167
169
  """
168
170
 
169
171
  max_page_seconds : int = -1
@@ -366,12 +368,14 @@ class Processor():
366
368
  self._base_logger = getLogger('ocrd.processor.base')
367
369
  if parameter is not None:
368
370
  self.parameter = parameter
369
- # ensure that shutdown gets called at destruction
370
- self._finalizer = weakref.finalize(self, self.shutdown)
371
371
  # workaround for deprecated#72 (@deprecated decorator does not work for subclasses):
372
372
  setattr(self, 'process',
373
373
  deprecated(version='3.0', reason='process() should be replaced with process_page_pcgts() or process_page_file() or process_workspace()')(getattr(self, 'process')))
374
374
 
375
+ def __del__(self):
376
+ self._base_logger.debug("shutting down %s in %s", repr(self), mp.current_process().name)
377
+ self.shutdown()
378
+
375
379
  def show_help(self, subcommand=None):
376
380
  """
377
381
  Print a usage description including the standard CLI and all of this processor's ocrd-tool
@@ -502,7 +506,7 @@ class Processor():
502
506
  # set up multitasking
503
507
  max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES)
504
508
  if self.max_workers > 0 and self.max_workers < config.OCRD_MAX_PARALLEL_PAGES:
505
- self._base_logger.info("limiting number of threads from %d to %d", max_workers, self.max_workers)
509
+ self._base_logger.info("limiting number of workers from %d to %d", max_workers, self.max_workers)
506
510
  max_workers = self.max_workers
507
511
  if max_workers > 1:
508
512
  assert isinstance(workspace.mets, ClientSideOcrdMets), \
@@ -514,22 +518,31 @@ class Processor():
514
518
 
515
519
  if max_workers > 1:
516
520
  executor_cls = ProcessPoolExecutor
521
+ log_queue = mp.Queue()
517
522
  else:
518
523
  executor_cls = DummyExecutor
524
+ log_queue = None
519
525
  executor = executor_cls(
520
526
  max_workers=max_workers or 1,
521
527
  # only forking method avoids pickling
522
528
  context=mp.get_context('fork'),
523
529
  # share processor instance as global to avoid pickling
524
530
  initializer=_page_worker_set_ctxt,
525
- initargs=(self,),
531
+ initargs=(self, log_queue),
526
532
  )
533
+ if max_workers > 1:
534
+ # forward messages from log queue (in subprocesses) to all root handlers
535
+ log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
536
+ log_listener.start()
527
537
  try:
528
538
  self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
529
539
  tasks = self.process_workspace_submit_tasks(executor, max_seconds)
530
540
  stats = self.process_workspace_handle_tasks(tasks)
531
541
  finally:
532
542
  executor.shutdown(kill_workers=True, wait=False)
543
+ if max_workers > 1:
544
+ log_listener.stop()
545
+ del log_listener
533
546
 
534
547
  except NotImplementedError:
535
548
  # fall back to deprecated method
@@ -1109,13 +1122,16 @@ in Processor.process_workspace. Forking allows inheriting global
1109
1122
  objects, and with the METS Server we do not mutate the local
1110
1123
  processor instance anyway.
1111
1124
  """
1112
- def _page_worker_set_ctxt(processor):
1125
+ def _page_worker_set_ctxt(processor, log_queue):
1113
1126
  """
1114
1127
  Overwrites `ocrd.processor.base._page_worker_processor` instance
1115
1128
  for sharing with subprocesses in ProcessPoolExecutor initializer.
1116
1129
  """
1117
1130
  global _page_worker_processor
1118
1131
  _page_worker_processor = processor
1132
+ if log_queue:
1133
+ # replace all log handlers with just one queue handler
1134
+ logging.root.handlers = [logging.handlers.QueueHandler(log_queue)]
1119
1135
 
1120
1136
  def _page_worker(timeout, *input_files):
1121
1137
  """
@@ -16,6 +16,26 @@
16
16
  "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
17
17
  }
18
18
  }
19
+ },
20
+ "ocrd-filter": {
21
+ "executable": "ocrd-filter",
22
+ "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
23
+ "steps": ["recognition/post-correction"],
24
+ "categories": ["Quality assurance"],
25
+ "input_file_grp_cardinality": 1,
26
+ "output_file_grp_cardinality": 1,
27
+ "parameters": {
28
+ "select": {
29
+ "type": "string",
30
+ "default": "//*[ends-with(local-name(),'Region')]",
31
+ "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
32
+ },
33
+ "plot": {
34
+ "type": "boolean",
35
+ "default": false,
36
+ "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
37
+ }
38
+ }
19
39
  }
20
40
  }
21
41
  }
@@ -13,9 +13,6 @@ from ocrd_utils import (
13
13
  make_file_id,
14
14
  MIME_TO_EXT,
15
15
  MIMETYPE_PAGE,
16
- parse_json_string_with_comments,
17
- resource_string,
18
- config
19
16
  )
20
17
  from ocrd_modelfactory import page_from_file
21
18
 
@@ -0,0 +1,108 @@
1
+ # pylint: disable=missing-module-docstring,invalid-name
2
+ from typing import Optional
3
+
4
+ from lxml import etree
5
+ import click
6
+
7
+ from ocrd import Processor, OcrdPageResult, OcrdPageResultImage
8
+ from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
9
+ from ocrd_models import OcrdPage
10
+
11
+ _SEGTYPES = [
12
+ "NoiseRegion",
13
+ "LineDrawingRegion",
14
+ "AdvertRegion",
15
+ "ImageRegion",
16
+ "ChartRegion",
17
+ "MusicRegion",
18
+ "GraphicRegion",
19
+ "UnknownRegion",
20
+ "CustomRegion",
21
+ "SeparatorRegion",
22
+ "MathsRegion",
23
+ "TextRegion",
24
+ "MapRegion",
25
+ "ChemRegion",
26
+ "TableRegion",
27
+ "TextLine",
28
+ "Word",
29
+ "Glyph"
30
+ ]
31
+
32
+ class FilterProcessor(Processor):
33
+ def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
34
+ """
35
+ Remove PAGE segment hierarchy elements based on flexible selection criteria.
36
+
37
+ Open and deserialise PAGE input file, then iterate over the segment hierarchy
38
+ down to the level required for ``select`` (which could be multiple levels at once).
39
+
40
+ Remove any segments matching XPath query ``select`` from that hierarchy (and from
41
+ the `ReadingOrder` if it is a region type).
42
+
43
+ \b
44
+ Besides full XPath 2.0 syntax, this supports extra predicates:
45
+ - `pc:pixelarea()` for the number of pixels of the bounding box (or sum area on node sets),
46
+ - `pc:textequiv()` for the first TextEquiv unicode string (or concatenated string on node sets).
47
+
48
+ If ``plot`` is `true`, then extract and write an image file for all removed segments
49
+ to the output fileGrp (without reference to the PAGE).
50
+
51
+ Produce a new PAGE output file by serialising the resulting hierarchy.
52
+ """
53
+ pcgts = input_pcgts[0]
54
+ result = OcrdPageResult(pcgts)
55
+ nodes = pcgts.xpath(self.parameter['select'])
56
+ # get PAGE objects from matching etree nodes
57
+ # but allow only hierarchy segments
58
+ segments = [segment for segment in map(pcgts.revmap.get, nodes)
59
+ if segment.__class__.__name__.replace('Type', '') in _SEGTYPES]
60
+ if not(len(segments)):
61
+ self.logger.info("no matches")
62
+ return result
63
+ rodict = pcgts.get_Page().get_ReadingOrderGroups()
64
+ if self.parameter['plot']:
65
+ page_image, page_coords, _ = self.workspace.image_from_page(pcgts.get_Page(), page_id)
66
+ for segment in segments:
67
+ segtype = segment.original_tagname_
68
+ self.logger.info("matched %s segment %s", segtype, segment.id)
69
+ parent = segment.parent_object_
70
+ partype = parent.__class__.__name__.replace('Type', '')
71
+ if partype == 'Page':
72
+ getattr(parent, 'get_' + segtype)().remove(segment)
73
+ elif partype.endswith('Region'):
74
+ if segtype.endswith('Region'):
75
+ getattr(parent, 'get_' + segtype)().remove(segment)
76
+ else:
77
+ parent.TextLine.remove(segment)
78
+ elif partype == 'TextLine':
79
+ parent.Word.remove(segment)
80
+ elif partype == 'Word':
81
+ parent.Glyph.remove(segment)
82
+ else:
83
+ raise Exception(f"unexpected type ({partype}) of parent for matched segment ({segtype})")
84
+ segment.parent_object_ = None
85
+ if segtype.endswith('Region') and segment.id in rodict:
86
+ # remove from ReadingOrder as well
87
+ roelem = rodict[segment.id]
88
+ rorefs = getattr(roelem.parent_object_, roelem.__class__.__name__.replace('Type', ''))
89
+ rorefs.remove(roelem)
90
+ roelem.parent_object_ = None
91
+ del rodict[segment.id]
92
+ if self.parameter['plot']:
93
+ segment_image, _ = self.workspace.image_from_segment(segment, page_image, page_coords)
94
+ result.images.append(OcrdPageResultImage(segment_image, segment.id + '.IMG', None))
95
+ return result
96
+
97
+ @property
98
+ def metadata_filename(self):
99
+ return 'processor/builtin/dummy/ocrd-tool.json'
100
+
101
+ @property
102
+ def executable(self):
103
+ return 'ocrd-filter'
104
+
105
+ @click.command()
106
+ @ocrd_cli_options
107
+ def cli(*args, **kwargs):
108
+ return ocrd_cli_wrap_processor(FilterProcessor, *args, **kwargs)
ocrd/resource_manager.py CHANGED
@@ -23,6 +23,10 @@ yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:timestamp'
23
23
 
24
24
  # pylint: enable=wrong-import-position
25
25
 
26
+ # pylint: enable=wrong-import-position
27
+
28
+ # pylint: enable=wrong-import-position
29
+
26
30
  from ocrd_validators import OcrdResourceListValidator
27
31
  from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config
28
32
  from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0b6
3
+ Version: 3.0.1
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -17,6 +17,7 @@ Requires-Dist: click>=7
17
17
  Requires-Dist: cryptography<43.0.0
18
18
  Requires-Dist: Deprecated==1.2.0
19
19
  Requires-Dist: docker
20
+ Requires-Dist: elementpath
20
21
  Requires-Dist: fastapi>=0.78.0
21
22
  Requires-Dist: filetype
22
23
  Requires-Dist: Flask