ocrd 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -16,7 +16,7 @@ def command_with_replaced_help(*replacements):
16
16
 
17
17
  class CommandWithReplacedHelp(click.Command):
18
18
  def get_help(self, ctx):
19
- newhelp = super().get_help(ctx)
19
+ newhelp : str = super().get_help(ctx)
20
20
  for replacement in replacements:
21
21
  newhelp = re.sub(*replacement, newhelp)
22
22
  # print(newhelp)
@@ -83,6 +83,8 @@ Variables:
83
83
  \b
84
84
  {config.describe('OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS')}
85
85
  \b
86
+ {config.describe('OCRD_NETWORK_RABBITMQ_HEARTBEAT')}
87
+ \b
86
88
  {config.describe('OCRD_PROFILE_FILE')}
87
89
  \b
88
90
  {config.describe('OCRD_PROFILE', wrap_text=False)}
@@ -48,6 +48,9 @@ def ocrd_cli_wrap_processor(
48
48
  # ocrd_network params end #
49
49
  **kwargs
50
50
  ):
51
+ # init logging handlers so no imported libs can preempt ours
52
+ initLogging()
53
+
51
54
  # FIXME: remove workspace arg entirely
52
55
  processor = processorClass(None)
53
56
  if not sys.argv[1:]:
@@ -89,8 +92,6 @@ def ocrd_cli_wrap_processor(
89
92
  # Used for checking/starting network agents for the WebAPI architecture
90
93
  check_and_run_network_agent(processorClass, subcommand, address, database, queue)
91
94
 
92
- # from here: single-run processing context
93
- initLogging()
94
95
  if 'parameter' in kwargs:
95
96
  # Disambiguate parameter file/literal, and resolve file
96
97
  def resolve(name):
ocrd/mets_server.py CHANGED
@@ -1,8 +1,10 @@
1
1
  """
2
2
  # METS server functionality
3
3
  """
4
+ import os
4
5
  import re
5
6
  from os import _exit, chmod
7
+ import signal
6
8
  from typing import Dict, Optional, Union, List, Tuple
7
9
  from time import sleep
8
10
  from pathlib import Path
@@ -155,13 +157,13 @@ class ClientSideOcrdMets:
155
157
  Request writing the changes to the file system
156
158
  """
157
159
  if not self.multiplexing_mode:
158
- self.session.request("PUT", url=self.url)
160
+ return self.session.request("PUT", url=self.url).text
159
161
  else:
160
- self.session.request(
162
+ return self.session.request(
161
163
  "POST",
162
164
  self.url,
163
165
  json=MpxReq.save(self.ws_dir_path)
164
- )
166
+ ).json()["text"]
165
167
 
166
168
  def stop(self):
167
169
  """
@@ -169,14 +171,13 @@ class ClientSideOcrdMets:
169
171
  """
170
172
  try:
171
173
  if not self.multiplexing_mode:
172
- self.session.request("DELETE", self.url)
173
- return
174
+ return self.session.request("DELETE", self.url).text
174
175
  else:
175
- self.session.request(
176
+ return self.session.request(
176
177
  "POST",
177
178
  self.url,
178
179
  json=MpxReq.stop(self.ws_dir_path)
179
- )
180
+ ).json()["text"]
180
181
  except ConnectionError:
181
182
  # Expected because we exit the process without returning
182
183
  pass
@@ -323,7 +324,7 @@ class ClientSideOcrdMets:
323
324
 
324
325
 
325
326
  class MpxReq:
326
- """This class wrapps the request bodies needed for the tcp forwarding
327
+ """This class wraps the request bodies needed for the tcp forwarding
327
328
 
328
329
  For every mets-server-call like find_files or workspace_path a special request_body is
329
330
  needed to call `MetsServerProxy.forward_tcp_request`. These are created by this functions.
@@ -346,12 +347,12 @@ class MpxReq:
346
347
  @staticmethod
347
348
  def save(ws_dir_path: str) -> Dict:
348
349
  return MpxReq.__args_wrapper(
349
- ws_dir_path, method_type="PUT", response_type="empty", request_url="", request_data={})
350
+ ws_dir_path, method_type="PUT", response_type="text", request_url="", request_data={})
350
351
 
351
352
  @staticmethod
352
353
  def stop(ws_dir_path: str) -> Dict:
353
354
  return MpxReq.__args_wrapper(
354
- ws_dir_path, method_type="DELETE", response_type="empty", request_url="", request_data={})
355
+ ws_dir_path, method_type="DELETE", response_type="text", request_url="", request_data={})
355
356
 
356
357
  @staticmethod
357
358
  def reload(ws_dir_path: str) -> Dict:
@@ -428,18 +429,24 @@ class OcrdMetsServer:
428
429
 
429
430
  @staticmethod
430
431
  def kill_process(mets_server_pid: int):
431
- subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
432
+ os.kill(mets_server_pid, signal.SIGINT)
433
+ sleep(3)
434
+ try:
435
+ os.kill(mets_server_pid, signal.SIGKILL)
436
+ except ProcessLookupError as e:
437
+ pass
432
438
 
433
439
  def shutdown(self):
440
+ pid = os.getpid()
441
+ self.log.info(f"Shutdown method of mets server[{pid}] invoked, sending SIGTERM signal.")
442
+ os.kill(pid, signal.SIGTERM)
434
443
  if self.is_uds:
435
444
  if Path(self.url).exists():
436
- self.log.debug(f'UDS socket {self.url} still exists, removing it')
445
+ self.log.warning(f"Due to a server shutdown, removing the existing UDS socket file: {self.url}")
437
446
  Path(self.url).unlink()
438
- # os._exit because uvicorn catches SystemExit raised by sys.exit
439
- _exit(0)
440
447
 
441
448
  def startup(self):
442
- self.log.info("Starting up METS server")
449
+ self.log.info(f"Configuring the Mets Server")
443
450
 
444
451
  workspace = self.workspace
445
452
 
@@ -465,32 +472,49 @@ class OcrdMetsServer:
465
472
  """
466
473
  Write current changes to the file system
467
474
  """
468
- return workspace.save_mets()
475
+ workspace.save_mets()
476
+ response = Response(content="The Mets Server is writing changes to disk.", media_type='text/plain')
477
+ self.log.info(f"PUT / -> {response.__dict__}")
478
+ return response
469
479
 
470
480
  @app.delete(path='/')
471
- async def stop():
481
+ def stop():
472
482
  """
473
483
  Stop the mets server
474
484
  """
475
- getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}')
476
485
  workspace.save_mets()
486
+ response = Response(content="The Mets Server will shut down soon...", media_type='text/plain')
477
487
  self.shutdown()
488
+ self.log.info(f"DELETE / -> {response.__dict__}")
489
+ return response
478
490
 
479
491
  @app.post(path='/reload')
480
- async def workspace_reload_mets():
492
+ def workspace_reload_mets():
481
493
  """
482
494
  Reload mets file from the file system
483
495
  """
484
496
  workspace.reload_mets()
485
- return Response(content=f'Reloaded from {workspace.directory}', media_type="text/plain")
497
+ response = Response(content=f"Reloaded from {workspace.directory}", media_type='text/plain')
498
+ self.log.info(f"POST /reload -> {response.__dict__}")
499
+ return response
486
500
 
487
501
  @app.get(path='/unique_identifier', response_model=str)
488
502
  async def unique_identifier():
489
- return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
503
+ response = Response(content=workspace.mets.unique_identifier, media_type='text/plain')
504
+ self.log.info(f"GET /unique_identifier -> {response.__dict__}")
505
+ return response
490
506
 
491
507
  @app.get(path='/workspace_path', response_model=str)
492
508
  async def workspace_path():
493
- return Response(content=workspace.directory, media_type="text/plain")
509
+ response = Response(content=workspace.directory, media_type="text/plain")
510
+ self.log.info(f"GET /workspace_path -> {response.__dict__}")
511
+ return response
512
+
513
+ @app.get(path='/physical_pages', response_model=OcrdPageListModel)
514
+ async def physical_pages():
515
+ response = {'physical_pages': workspace.mets.physical_pages}
516
+ self.log.info(f"GET /physical_pages -> {response}")
517
+ return response
494
518
 
495
519
  @app.get(path='/physical_pages', response_model=OcrdPageListModel)
496
520
  async def physical_pages():
@@ -498,18 +522,24 @@ class OcrdMetsServer:
498
522
 
499
523
  @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
500
524
  async def file_groups():
501
- return {'file_groups': workspace.mets.file_groups}
525
+ response = {'file_groups': workspace.mets.file_groups}
526
+ self.log.info(f"GET /file_groups -> {response}")
527
+ return response
502
528
 
503
529
  @app.get(path='/agent', response_model=OcrdAgentListModel)
504
530
  async def agents():
505
- return OcrdAgentListModel.create(workspace.mets.agents)
531
+ response = OcrdAgentListModel.create(workspace.mets.agents)
532
+ self.log.info(f"GET /agent -> {response.__dict__}")
533
+ return response
506
534
 
507
535
  @app.post(path='/agent', response_model=OcrdAgentModel)
508
536
  async def add_agent(agent: OcrdAgentModel):
509
537
  kwargs = agent.dict()
510
538
  kwargs['_type'] = kwargs.pop('type')
511
539
  workspace.mets.add_agent(**kwargs)
512
- return agent
540
+ response = agent
541
+ self.log.info(f"POST /agent -> {response.__dict__}")
542
+ return response
513
543
 
514
544
  @app.get(path="/file", response_model=OcrdFileListModel)
515
545
  async def find_files(
@@ -526,7 +556,9 @@ class OcrdMetsServer:
526
556
  found = workspace.mets.find_all_files(
527
557
  fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype, local_filename=local_filename, url=url
528
558
  )
529
- return OcrdFileListModel.create(found)
559
+ response = OcrdFileListModel.create(found)
560
+ self.log.info(f"GET /file -> {response.__dict__}")
561
+ return response
530
562
 
531
563
  @app.post(path='/file', response_model=OcrdFileModel)
532
564
  async def add_file(
@@ -549,7 +581,9 @@ class OcrdMetsServer:
549
581
  # Add to workspace
550
582
  kwargs = file_resource.dict()
551
583
  workspace.add_file(**kwargs, force=force)
552
- return file_resource
584
+ response = file_resource
585
+ self.log.info(f"POST /file -> {response.__dict__}")
586
+ return response
553
587
 
554
588
  # ------------- #
555
589
 
@@ -557,9 +591,6 @@ class OcrdMetsServer:
557
591
  # Create socket and change to world-readable and -writable to avoid permission errors
558
592
  self.log.debug(f"chmod 0o677 {self.url}")
559
593
  server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
560
- if Path(self.url).exists() and not is_socket_in_use(self.url):
561
- # remove leftover unused socket which blocks startup
562
- Path(self.url).unlink()
563
594
  server.bind(self.url) # creates the socket file
564
595
  atexit.register(self.shutdown)
565
596
  server.close()
@@ -571,16 +602,5 @@ class OcrdMetsServer:
571
602
  uvicorn_kwargs['log_config'] = None
572
603
  uvicorn_kwargs['access_log'] = False
573
604
 
574
- self.log.debug("Starting uvicorn")
605
+ self.log.info("Starting the uvicorn Mets Server")
575
606
  uvicorn.run(app, **uvicorn_kwargs)
576
-
577
-
578
- def is_socket_in_use(socket_path):
579
- if Path(socket_path).exists():
580
- client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
581
- try:
582
- client.connect(socket_path)
583
- except OSError:
584
- return False
585
- client.close()
586
- return True
ocrd/processor/base.py CHANGED
@@ -130,7 +130,8 @@ class DummyExecutor:
130
130
  def __init__(self, initializer=None, initargs=(), **kwargs):
131
131
  initializer(*initargs)
132
132
  def shutdown(self, **kwargs):
133
- pass
133
+ # allow gc to catch processor instance (unless cached)
134
+ _page_worker_set_ctxt(None, None)
134
135
  def submit(self, fn, *args, **kwargs) -> DummyFuture:
135
136
  return DummyFuture(fn, *args, **kwargs)
136
137
 
@@ -372,7 +373,7 @@ class Processor():
372
373
  deprecated(version='3.0', reason='process() should be replaced with process_page_pcgts() or process_page_file() or process_workspace()')(getattr(self, 'process')))
373
374
 
374
375
  def __del__(self):
375
- self._base_logger.debug("shutting down")
376
+ self._base_logger.debug("shutting down %s in %s", repr(self), mp.current_process().name)
376
377
  self.shutdown()
377
378
 
378
379
  def show_help(self, subcommand=None):
@@ -505,7 +506,7 @@ class Processor():
505
506
  # set up multitasking
506
507
  max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES)
507
508
  if self.max_workers > 0 and self.max_workers < config.OCRD_MAX_PARALLEL_PAGES:
508
- self._base_logger.info("limiting number of threads from %d to %d", max_workers, self.max_workers)
509
+ self._base_logger.info("limiting number of workers from %d to %d", max_workers, self.max_workers)
509
510
  max_workers = self.max_workers
510
511
  if max_workers > 1:
511
512
  assert isinstance(workspace.mets, ClientSideOcrdMets), \
@@ -518,12 +519,9 @@ class Processor():
518
519
  if max_workers > 1:
519
520
  executor_cls = ProcessPoolExecutor
520
521
  log_queue = mp.Queue()
521
- # forward messages from log queue (in subprocesses) to all root handlers
522
- log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
523
522
  else:
524
523
  executor_cls = DummyExecutor
525
524
  log_queue = None
526
- log_listener = None
527
525
  executor = executor_cls(
528
526
  max_workers=max_workers or 1,
529
527
  # only forking method avoids pickling
@@ -533,6 +531,8 @@ class Processor():
533
531
  initargs=(self, log_queue),
534
532
  )
535
533
  if max_workers > 1:
534
+ # forward messages from log queue (in subprocesses) to all root handlers
535
+ log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
536
536
  log_listener.start()
537
537
  try:
538
538
  self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
@@ -542,6 +542,7 @@ class Processor():
542
542
  executor.shutdown(kill_workers=True, wait=False)
543
543
  if max_workers > 1:
544
544
  log_listener.stop()
545
+ del log_listener
545
546
 
546
547
  except NotImplementedError:
547
548
  # fall back to deprecated method
@@ -16,6 +16,26 @@
16
16
  "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
17
17
  }
18
18
  }
19
+ },
20
+ "ocrd-filter": {
21
+ "executable": "ocrd-filter",
22
+ "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
23
+ "steps": ["recognition/post-correction"],
24
+ "categories": ["Quality assurance"],
25
+ "input_file_grp_cardinality": 1,
26
+ "output_file_grp_cardinality": 1,
27
+ "parameters": {
28
+ "select": {
29
+ "type": "string",
30
+ "default": "//*[ends-with(local-name(),'Region')]",
31
+ "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
32
+ },
33
+ "plot": {
34
+ "type": "boolean",
35
+ "default": false,
36
+ "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
37
+ }
38
+ }
19
39
  }
20
40
  }
21
41
  }
@@ -13,9 +13,6 @@ from ocrd_utils import (
13
13
  make_file_id,
14
14
  MIME_TO_EXT,
15
15
  MIMETYPE_PAGE,
16
- parse_json_string_with_comments,
17
- resource_string,
18
- config
19
16
  )
20
17
  from ocrd_modelfactory import page_from_file
21
18
 
@@ -0,0 +1,108 @@
1
+ # pylint: disable=missing-module-docstring,invalid-name
2
+ from typing import Optional
3
+
4
+ from lxml import etree
5
+ import click
6
+
7
+ from ocrd import Processor, OcrdPageResult, OcrdPageResultImage
8
+ from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
9
+ from ocrd_models import OcrdPage
10
+
11
+ _SEGTYPES = [
12
+ "NoiseRegion",
13
+ "LineDrawingRegion",
14
+ "AdvertRegion",
15
+ "ImageRegion",
16
+ "ChartRegion",
17
+ "MusicRegion",
18
+ "GraphicRegion",
19
+ "UnknownRegion",
20
+ "CustomRegion",
21
+ "SeparatorRegion",
22
+ "MathsRegion",
23
+ "TextRegion",
24
+ "MapRegion",
25
+ "ChemRegion",
26
+ "TableRegion",
27
+ "TextLine",
28
+ "Word",
29
+ "Glyph"
30
+ ]
31
+
32
+ class FilterProcessor(Processor):
33
+ def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
34
+ """
35
+ Remove PAGE segment hierarchy elements based on flexible selection criteria.
36
+
37
+ Open and deserialise PAGE input file, then iterate over the segment hierarchy
38
+ down to the level required for ``select`` (which could be multiple levels at once).
39
+
40
+ Remove any segments matching XPath query ``select`` from that hierarchy (and from
41
+ the `ReadingOrder` if it is a region type).
42
+
43
+ \b
44
+ Besides full XPath 2.0 syntax, this supports extra predicates:
45
+ - `pc:pixelarea()` for the number of pixels of the bounding box (or sum area on node sets),
46
+ - `pc:textequiv()` for the first TextEquiv unicode string (or concatenated string on node sets).
47
+
48
+ If ``plot`` is `true`, then extract and write an image file for all removed segments
49
+ to the output fileGrp (without reference to the PAGE).
50
+
51
+ Produce a new PAGE output file by serialising the resulting hierarchy.
52
+ """
53
+ pcgts = input_pcgts[0]
54
+ result = OcrdPageResult(pcgts)
55
+ nodes = pcgts.xpath(self.parameter['select'])
56
+ # get PAGE objects from matching etree nodes
57
+ # but allow only hierarchy segments
58
+ segments = [segment for segment in map(pcgts.revmap.get, nodes)
59
+ if segment.__class__.__name__.replace('Type', '') in _SEGTYPES]
60
+ if not(len(segments)):
61
+ self.logger.info("no matches")
62
+ return result
63
+ rodict = pcgts.get_Page().get_ReadingOrderGroups()
64
+ if self.parameter['plot']:
65
+ page_image, page_coords, _ = self.workspace.image_from_page(pcgts.get_Page(), page_id)
66
+ for segment in segments:
67
+ segtype = segment.original_tagname_
68
+ self.logger.info("matched %s segment %s", segtype, segment.id)
69
+ parent = segment.parent_object_
70
+ partype = parent.__class__.__name__.replace('Type', '')
71
+ if partype == 'Page':
72
+ getattr(parent, 'get_' + segtype)().remove(segment)
73
+ elif partype.endswith('Region'):
74
+ if segtype.endswith('Region'):
75
+ getattr(parent, 'get_' + segtype)().remove(segment)
76
+ else:
77
+ parent.TextLine.remove(segment)
78
+ elif partype == 'TextLine':
79
+ parent.Word.remove(segment)
80
+ elif partype == 'Word':
81
+ parent.Glyph.remove(segment)
82
+ else:
83
+ raise Exception(f"unexpected type ({partype}) of parent for matched segment ({segtype})")
84
+ segment.parent_object_ = None
85
+ if segtype.endswith('Region') and segment.id in rodict:
86
+ # remove from ReadingOrder as well
87
+ roelem = rodict[segment.id]
88
+ rorefs = getattr(roelem.parent_object_, roelem.__class__.__name__.replace('Type', ''))
89
+ rorefs.remove(roelem)
90
+ roelem.parent_object_ = None
91
+ del rodict[segment.id]
92
+ if self.parameter['plot']:
93
+ segment_image, _ = self.workspace.image_from_segment(segment, page_image, page_coords)
94
+ result.images.append(OcrdPageResultImage(segment_image, segment.id + '.IMG', None))
95
+ return result
96
+
97
+ @property
98
+ def metadata_filename(self):
99
+ return 'processor/builtin/dummy/ocrd-tool.json'
100
+
101
+ @property
102
+ def executable(self):
103
+ return 'ocrd-filter'
104
+
105
+ @click.command()
106
+ @ocrd_cli_options
107
+ def cli(*args, **kwargs):
108
+ return ocrd_cli_wrap_processor(FilterProcessor, *args, **kwargs)
ocrd/resource_manager.py CHANGED
@@ -23,6 +23,10 @@ yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:timestamp'
23
23
 
24
24
  # pylint: enable=wrong-import-position
25
25
 
26
+ # pylint: enable=wrong-import-position
27
+
28
+ # pylint: enable=wrong-import-position
29
+
26
30
  from ocrd_validators import OcrdResourceListValidator
27
31
  from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config
28
32
  from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0b7
3
+ Version: 3.0.1
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -17,6 +17,7 @@ Requires-Dist: click>=7
17
17
  Requires-Dist: cryptography<43.0.0
18
18
  Requires-Dist: Deprecated==1.2.0
19
19
  Requires-Dist: docker
20
+ Requires-Dist: elementpath
20
21
  Requires-Dist: fastapi>=0.78.0
21
22
  Requires-Dist: filetype
22
23
  Requires-Dist: Flask
@@ -1,16 +1,16 @@
1
1
  ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
2
  ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
3
  ocrd/lib.bash,sha256=le6XqAOEacdjP3JNSlPkxwRH1y0oVjNQM2tX5d6QFO4,10901
4
- ocrd/mets_server.py,sha256=U62eih1_O_N0StunVFkEustFs2PlrcMzccraj6_QRk4,21295
4
+ ocrd/mets_server.py,sha256=wiOvSSYZMicjQC958dD2i7uHMwfZCaDZQd7E5l50ayU,22436
5
5
  ocrd/ocrd-all-tool.json,sha256=9bX2VYnUwhTAzAvKaoT77BFzbgBGgyIt7qBqARpwWNc,586
6
6
  ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
- ocrd/resource_manager.py,sha256=8BMVKJq8J56hugi8vtGn9Ffuk7oRkbs197aG74aKbCY,16733
8
+ ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
10
  ocrd/workspace.py,sha256=cedqK7es2i2nwQCiUiVyWk3j4-nH7bsi6TF7v8siTio,65794
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
12
  ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
- ocrd/cli/__init__.py,sha256=lNR6wMf7JhQ8Jf33tUkowJr0mB3423OMY0_6dkMRLvU,2672
13
+ ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
14
14
  ocrd/cli/bashlib.py,sha256=ypFBM3-IULz_IEBx0Y04eGt9VbQWwEWm4ujm9g_hPWY,6009
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
16
  ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
@@ -20,50 +20,51 @@ ocrd/cli/resmgr.py,sha256=bTE-MpF7RRCHhgAbknqZUFHgHScIK6FR3S4h4DEAets,10080
20
20
  ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
21
  ocrd/cli/workspace.py,sha256=KTbSzIUrba5WoYETvM9ElRZVsDUHCGVvjoFgBGZS2nU,40468
22
22
  ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
- ocrd/decorators/__init__.py,sha256=NWqZUTSBve6yIBgTN3miGmz4w1IFefLhG92qXx5ADtw,7620
23
+ ocrd/decorators/__init__.py,sha256=PyXX7vxdWkRHixas9dWUtyO3YLczcly8ZEpfZDSMVp8,7639
24
24
  ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
25
25
  ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=hr2EugwAY_-GJ7F7g77Od9o9eAqhfLBHSpfmCql2OCU,2665
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=8kFrYLd1cSHBaBolHjXdIVHwmV8muKgBCBrAYw7kWTQ,59154
30
- ocrd/processor/concurrent.py,sha256=IMMBFGDGqfpgm7Rp6J_dnXVckyBKntwARwcKDnxadHQ,38217
29
+ ocrd/processor/base.py,sha256=rVTQeUpZR_rBDh44Q7Xtl5TGcEdyBhMTDfpAgx4eLPg,59295
31
30
  ocrd/processor/helpers.py,sha256=8ngrqAJ01BSoSJNsIoK_YfA8QdryM5y0MqaqA9f7ELM,10483
32
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
33
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- ocrd/processor/builtin/dummy_processor.py,sha256=iWiw_jJXOqwr7-hFjdkmTCCo1xGr6MLGOshx81PTu-8,3548
33
+ ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
34
+ ocrd/processor/builtin/filter_processor.py,sha256=nDnXjo2tDCjodURb8VlB4VJtRwWGx261jH7AG91edSk,4317
35
35
  ocrd/processor/builtin/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=aTA2FZRsRsrkbTctkazFeRu4xsTF6yCdeY07cMzOyt4,677
37
- ocrd_modelfactory/__init__.py,sha256=0baYSJXrOCTCguHkE6hBeqpGNVUe3aZUocv64A-DMDk,4094
36
+ ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=VoI37paWiUyMkTN5Qqau8R1Clmw24-HcZu4wjy1Br9Y,2311
37
+ ocrd_modelfactory/__init__.py,sha256=NyJT1uSvmeEwibRFOkh0AEoVnYfP0mzxU--pP23B-TQ,4404
38
38
  ocrd_models/__init__.py,sha256=A0aj0mOraNb-xfiUueACdoaqISnp0qH-F49nTJg2vCs,380
39
39
  ocrd_models/constants.py,sha256=fI6Qz4OPOm6UBLQ_P2dlpjcwB0XFJZ7AgxxKqgc75X0,2724
40
40
  ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,1203
41
41
  ocrd_models/ocrd_agent.py,sha256=E9OtDhz9UfKb6ou2qvsuCL9NlO1V6zMb0s8nVq8dVos,5609
42
- ocrd_models/ocrd_exif.py,sha256=5BRLjvB6jg36V68i8jvVnT2SSNnpqLbhLsaMuP51Scw,4583
42
+ ocrd_models/ocrd_exif.py,sha256=wRSprHxCy9LCXw41Fi9kp-CbFc5NFX9ZFIFNszB41qk,4585
43
43
  ocrd_models/ocrd_file.py,sha256=7lyHezuNnl2FEYV1lV35-QTCrgYAL-3wO2ulFUNq2Ak,9717
44
44
  ocrd_models/ocrd_mets.py,sha256=cRBEnvRNmRUhcrKouqygTQqYA-XdgyA-FNLwt86V0vg,42878
45
- ocrd_models/ocrd_page.py,sha256=sVIvvMeBT8eZnOfW0DTjQUNyu62-llz0v_Ga5Xo-tUM,5393
46
- ocrd_models/ocrd_page_generateds.py,sha256=wfx3vESMAi08rl6-16zNVJe4E3B6APIvL6RCr1roAzg,774092
45
+ ocrd_models/ocrd_page.py,sha256=TTCnvpKGyZx1dqH8LnDiVVVPjU6emWGVLO_4o9rQHtw,6233
46
+ ocrd_models/ocrd_page_generateds.py,sha256=vjXPob5UMxYxPI6un8SYTL673Nhph0E2WEZexBgbvY4,841794
47
47
  ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,1641
48
48
  ocrd_models/report.py,sha256=luZxvzAAQyGYOlRNSJQUIUIANG81iGmBW5ag-uXxKCA,2026
49
49
  ocrd_models/utils.py,sha256=0_WHf5NEn1WC8MKJc6X_RK8gW-70Z09_mslkKOj7uF8,2369
50
+ ocrd_models/xpath_functions.py,sha256=AwR8tHf56-mmIksnw_GeOQ760sxNHqK92T7z9OfsEEs,1437
50
51
  ocrd_network/__init__.py,sha256=gMejC614J5PPGgXDKBiQS0jt-Jx8qOrLbWH7zt8x8Gs,374
51
- ocrd_network/client.py,sha256=j9PJ9QHI6qJ_qAxVlpTk5mLfq0Xemj_Br-0SX-09U5M,2834
52
- ocrd_network/client_utils.py,sha256=JDBzDOGY2G_bgS8fQ677CMHG9DWSvIuWTlEFzdsXr_Q,4527
52
+ ocrd_network/client.py,sha256=rzqtamZ8krRRy-QTO-AeWH8Lr3HhRiQe2R1-Lovd40g,3020
53
+ ocrd_network/client_utils.py,sha256=VVZMNBgGznh41exZ78S48X3DDwHcWTuOq-LNdxjRvak,5002
53
54
  ocrd_network/constants.py,sha256=IeNtcU6dqonDE3Zw83_61auhS8X2b8wsjAxYg1zvK-M,1902
54
55
  ocrd_network/database.py,sha256=fcft7vdRDoR7vmPL1xNYTIeOg5DwRPcggwYDYxLy5ik,10706
55
56
  ocrd_network/logging_utils.py,sha256=bO9TQqBXw9CIZEKp8tHXbeE2NuJWMiaQDHzS05b4ajo,2153
56
57
  ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
57
58
  ocrd_network/process_helpers.py,sha256=KpkInXsa5bgrxvTOouyMJ0NgJhaz0J9Gjs5sZHBcH64,2373
58
- ocrd_network/processing_server.py,sha256=8EFQIcjiQ-AEc4goaggxa-hYWhJW9fMzR768ZIX8vZE,41139
59
- ocrd_network/processing_worker.py,sha256=VV_pC5ljtjPpMY89MElAMZ-YUJdKk7IPugxFfDy8viA,12396
60
- ocrd_network/processor_server.py,sha256=RVX0MSzzayDdqHg7Kga7L4vOsjmQcoRYObh6Naxz-U4,9858
61
- ocrd_network/server_cache.py,sha256=GKX91SkVahgAk0nGWT0S-0qWp7a9ubxncjwwLTKs7JY,12829
62
- ocrd_network/server_utils.py,sha256=NBSZvLD7o43fMtSkqR70ougW-IWCW9J71GHqZIBeD0Q,10982
63
- ocrd_network/tcp_to_uds_mets_proxy.py,sha256=pNEl5jH09M-zuUIx25nog7IZ-fkXabi3724kQ_3iMTs,2894
64
- ocrd_network/utils.py,sha256=Vm8EiBBSKv5fAlkcb6nB8-duFN633ZIRAcPKfk7iyZY,6366
59
+ ocrd_network/processing_server.py,sha256=qBiYk4wgTLqhHvbmDWu_F626BfSfyvkoCD-i0ZwsBSE,42109
60
+ ocrd_network/processing_worker.py,sha256=fhIvmDQAYOkHYtUs5IB8Jk2lOKUTIBk3DskAsFloijA,12591
61
+ ocrd_network/processor_server.py,sha256=2CD9TlinXk6x1jFjP5VWOXgJe8lAQdxc9zjZuVy3EOw,9931
62
+ ocrd_network/server_cache.py,sha256=LpvJ-_Lbaeo4M3t8rZDdm9DAErZr8lDlma6pYc0m7aQ,13149
63
+ ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,12222
64
+ ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
65
+ ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
65
66
  ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
66
- ocrd_network/cli/client.py,sha256=4bpsDRwm3WVFHA6ITM-VKAFx-JQ6ldbBKFVYleWhiW8,7515
67
+ ocrd_network/cli/client.py,sha256=XYWbeSiPK4BQXuyTq_FTOXEKljXVLkukWfx07aKbthY,8424
67
68
  ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
68
69
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
69
70
  ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
@@ -74,7 +75,7 @@ ocrd_network/models/ocrd_tool.py,sha256=WhxSwDyEXtF03Cu8u2tLZcYM0tCacL4PX1GveAxn
74
75
  ocrd_network/models/workflow.py,sha256=GL8q7RX9fGdXG3iVyJpCeLXbWa-2qI_SIxqhzxs9VK8,189
75
76
  ocrd_network/models/workspace.py,sha256=42G8RoS0wJ902LHyM09NIaqHoRdmP-Oib3d5I07sTBQ,1579
76
77
  ocrd_network/rabbitmq_utils/__init__.py,sha256=8MRawAiSpZ9IQUBcLAS7sYOzNC9sI11eZOLk7k92_ZQ,691
77
- ocrd_network/rabbitmq_utils/connector.py,sha256=tLYayGlGRrqLkQChhL5FyUyF0MQTi4w-nWKxz_IuX-0,11342
78
+ ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
78
79
  ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
79
80
  ocrd_network/rabbitmq_utils/consumer.py,sha256=3WeryDmo0dSD9U0eLODbDElscvhEYjNeCBIewQHYfws,2488
80
81
  ocrd_network/rabbitmq_utils/helpers.py,sha256=y8FTC1ml_IBNcFo14GgCNtNRxYDotQn7U14HmTkv6h0,4874
@@ -83,12 +84,12 @@ ocrd_network/rabbitmq_utils/publisher.py,sha256=mw4XQQhRE1xUQVgEUseyG845iIgVO-9G
83
84
  ocrd_network/runtime_data/__init__.py,sha256=3jYkmT4mxMUcpbDaSw7Ld0KTedGEx_5vUQPDjwUyJZc,367
84
85
  ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
85
86
  ocrd_network/runtime_data/connection_clients.py,sha256=DZyAvkNyMaIddGJs56s2pMP_fK-XWAtICxk1cjvkWYM,4207
86
- ocrd_network/runtime_data/deployer.py,sha256=W7SHn6XBC5fIaTJIMnHOMxEyp7dVqvHY4_cepaloMhQ,8315
87
+ ocrd_network/runtime_data/deployer.py,sha256=LkDUG0uJf_V4SteiOM3EWwhKtdANCjmAOEAJJDshN30,9111
87
88
  ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZSj_WE,12185
88
89
  ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
89
90
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
90
91
  ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
91
- ocrd_utils/config.py,sha256=Rkqv5wWEmlDDD0l1IWo9TPgn5ppPnHPRH9FfkMST29E,11117
92
+ ocrd_utils/config.py,sha256=smjUAGK5n0iKZCs4dZAtrZurelcaavlFqhIUJWNMOi0,11796
92
93
  ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
93
94
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
94
95
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
@@ -119,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
119
120
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
120
121
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
121
122
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
122
- ocrd-3.0.0b7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
- ocrd-3.0.0b7.dist-info/METADATA,sha256=lc6oZVb9SPl97qvspPigal4yb_1DBBRZwE3GQUTAK_o,10417
124
- ocrd-3.0.0b7.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
125
- ocrd-3.0.0b7.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
126
- ocrd-3.0.0b7.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
127
- ocrd-3.0.0b7.dist-info/RECORD,,
123
+ ocrd-3.0.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
+ ocrd-3.0.1.dist-info/METADATA,sha256=y6mdBSjKmkTMSJ7F1LAvjCWeagTBRAeiYaka9_Z2Djc,10442
125
+ ocrd-3.0.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
126
+ ocrd-3.0.1.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
+ ocrd-3.0.1.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
+ ocrd-3.0.1.dist-info/RECORD,,
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
2
  ocrd = ocrd.cli:cli
3
3
  ocrd-dummy = ocrd.processor.builtin.dummy_processor:cli
4
+ ocrd-filter = ocrd.processor.builtin.filter_processor:cli