ocrd 3.0.2__tar.gz → 3.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.0.2/src/ocrd.egg-info → ocrd-3.0.4}/PKG-INFO +2 -2
- ocrd-3.0.4/VERSION +1 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/mets_server.py +12 -12
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/base.py +9 -3
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/helpers.py +11 -5
- {ocrd-3.0.2 → ocrd-3.0.4/src/ocrd.egg-info}/PKG-INFO +2 -2
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_page_generateds.py +3 -3
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_mets_server.py +1 -1
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_workspace.py +4 -4
- ocrd-3.0.2/VERSION +0 -1
- {ocrd-3.0.2 → ocrd-3.0.4}/LICENSE +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/MANIFEST.in +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_bashlib.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd_models.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd_network.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd_utils.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/README_ocrd_validators.md +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/pyproject.toml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/requirements.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/setup.cfg +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/bashlib.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/ocrd_tool.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/validate.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/workspace.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/decorators/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/lib.bash +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/builtin/filter_processor.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/processor/ocrd_page_result.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/resolver.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/resource_manager.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/workspace.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/workspace_backup.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd.egg-info/SOURCES.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd.egg-info/requires.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_modelfactory/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_exif.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_file.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_mets.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_page.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/ocrd_xml_base.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/report.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_models/xpath_functions.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/cli/client.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/client.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/client_utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/database.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/logging_utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/processing_server.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/deployer.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/server_utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/config.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/image.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/logging.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/os.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_utils/str.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/json_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/page_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/parameter_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/resource_list_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/workspace_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/src/ocrd_validators/xsd_validator.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_decorators.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_logging.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_logging_conf.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_model_factory.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_resolver.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_resource_manager.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_task_sequence.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_utils.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_version.py +0 -0
- {ocrd-3.0.2 → ocrd-3.0.4}/tests/test_workspace_remove.py +0 -0
ocrd-3.0.4/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.0.4
|
|
@@ -474,7 +474,7 @@ class OcrdMetsServer:
|
|
|
474
474
|
"""
|
|
475
475
|
workspace.save_mets()
|
|
476
476
|
response = Response(content="The Mets Server is writing changes to disk.", media_type='text/plain')
|
|
477
|
-
self.log.
|
|
477
|
+
self.log.debug(f"PUT / -> {response.__dict__}")
|
|
478
478
|
return response
|
|
479
479
|
|
|
480
480
|
@app.delete(path='/')
|
|
@@ -485,7 +485,7 @@ class OcrdMetsServer:
|
|
|
485
485
|
workspace.save_mets()
|
|
486
486
|
response = Response(content="The Mets Server will shut down soon...", media_type='text/plain')
|
|
487
487
|
self.shutdown()
|
|
488
|
-
self.log.
|
|
488
|
+
self.log.debug(f"DELETE / -> {response.__dict__}")
|
|
489
489
|
return response
|
|
490
490
|
|
|
491
491
|
@app.post(path='/reload')
|
|
@@ -495,25 +495,25 @@ class OcrdMetsServer:
|
|
|
495
495
|
"""
|
|
496
496
|
workspace.reload_mets()
|
|
497
497
|
response = Response(content=f"Reloaded from {workspace.directory}", media_type='text/plain')
|
|
498
|
-
self.log.
|
|
498
|
+
self.log.debug(f"POST /reload -> {response.__dict__}")
|
|
499
499
|
return response
|
|
500
500
|
|
|
501
501
|
@app.get(path='/unique_identifier', response_model=str)
|
|
502
502
|
async def unique_identifier():
|
|
503
503
|
response = Response(content=workspace.mets.unique_identifier, media_type='text/plain')
|
|
504
|
-
self.log.
|
|
504
|
+
self.log.debug(f"GET /unique_identifier -> {response.__dict__}")
|
|
505
505
|
return response
|
|
506
506
|
|
|
507
507
|
@app.get(path='/workspace_path', response_model=str)
|
|
508
508
|
async def workspace_path():
|
|
509
509
|
response = Response(content=workspace.directory, media_type="text/plain")
|
|
510
|
-
self.log.
|
|
510
|
+
self.log.debug(f"GET /workspace_path -> {response.__dict__}")
|
|
511
511
|
return response
|
|
512
512
|
|
|
513
513
|
@app.get(path='/physical_pages', response_model=OcrdPageListModel)
|
|
514
514
|
async def physical_pages():
|
|
515
515
|
response = {'physical_pages': workspace.mets.physical_pages}
|
|
516
|
-
self.log.
|
|
516
|
+
self.log.debug(f"GET /physical_pages -> {response}")
|
|
517
517
|
return response
|
|
518
518
|
|
|
519
519
|
@app.get(path='/physical_pages', response_model=OcrdPageListModel)
|
|
@@ -523,13 +523,13 @@ class OcrdMetsServer:
|
|
|
523
523
|
@app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
|
|
524
524
|
async def file_groups():
|
|
525
525
|
response = {'file_groups': workspace.mets.file_groups}
|
|
526
|
-
self.log.
|
|
526
|
+
self.log.debug(f"GET /file_groups -> {response}")
|
|
527
527
|
return response
|
|
528
528
|
|
|
529
529
|
@app.get(path='/agent', response_model=OcrdAgentListModel)
|
|
530
530
|
async def agents():
|
|
531
531
|
response = OcrdAgentListModel.create(workspace.mets.agents)
|
|
532
|
-
self.log.
|
|
532
|
+
self.log.debug(f"GET /agent -> {response.__dict__}")
|
|
533
533
|
return response
|
|
534
534
|
|
|
535
535
|
@app.post(path='/agent', response_model=OcrdAgentModel)
|
|
@@ -538,7 +538,7 @@ class OcrdMetsServer:
|
|
|
538
538
|
kwargs['_type'] = kwargs.pop('type')
|
|
539
539
|
workspace.mets.add_agent(**kwargs)
|
|
540
540
|
response = agent
|
|
541
|
-
self.log.
|
|
541
|
+
self.log.debug(f"POST /agent -> {response.__dict__}")
|
|
542
542
|
return response
|
|
543
543
|
|
|
544
544
|
@app.get(path="/file", response_model=OcrdFileListModel)
|
|
@@ -557,14 +557,14 @@ class OcrdMetsServer:
|
|
|
557
557
|
fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype, local_filename=local_filename, url=url
|
|
558
558
|
)
|
|
559
559
|
response = OcrdFileListModel.create(found)
|
|
560
|
-
self.log.
|
|
560
|
+
self.log.debug(f"GET /file -> {response.__dict__}")
|
|
561
561
|
return response
|
|
562
562
|
|
|
563
563
|
@app.post(path='/file', response_model=OcrdFileModel)
|
|
564
564
|
async def add_file(
|
|
565
565
|
file_grp: str = Form(),
|
|
566
566
|
file_id: str = Form(),
|
|
567
|
-
page_id: Optional[str] = Form(),
|
|
567
|
+
page_id: Optional[str] = Form(None),
|
|
568
568
|
mimetype: str = Form(),
|
|
569
569
|
url: Optional[str] = Form(None),
|
|
570
570
|
local_filename: Optional[str] = Form(None),
|
|
@@ -582,7 +582,7 @@ class OcrdMetsServer:
|
|
|
582
582
|
kwargs = file_resource.dict()
|
|
583
583
|
workspace.add_file(**kwargs, force=force)
|
|
584
584
|
response = file_resource
|
|
585
|
-
self.log.
|
|
585
|
+
self.log.debug(f"POST /file -> {response.__dict__}")
|
|
586
586
|
return response
|
|
587
587
|
|
|
588
588
|
# ------------- #
|
|
@@ -534,15 +534,20 @@ class Processor():
|
|
|
534
534
|
# forward messages from log queue (in subprocesses) to all root handlers
|
|
535
535
|
log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
|
|
536
536
|
log_listener.start()
|
|
537
|
+
tasks = None
|
|
537
538
|
try:
|
|
538
539
|
self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
|
|
539
540
|
tasks = self.process_workspace_submit_tasks(executor, max_seconds)
|
|
540
541
|
stats = self.process_workspace_handle_tasks(tasks)
|
|
541
542
|
finally:
|
|
542
543
|
executor.shutdown(kill_workers=True, wait=False)
|
|
544
|
+
self._base_logger.debug("stopped executor %s after %d tasks", str(executor), len(tasks) if tasks else -1)
|
|
543
545
|
if max_workers > 1:
|
|
544
|
-
|
|
545
|
-
|
|
546
|
+
# can cause deadlock:
|
|
547
|
+
#log_listener.stop()
|
|
548
|
+
# not much better:
|
|
549
|
+
#log_listener.enqueue_sentinel()
|
|
550
|
+
pass
|
|
546
551
|
|
|
547
552
|
except NotImplementedError:
|
|
548
553
|
# fall back to deprecated method
|
|
@@ -670,11 +675,12 @@ class Processor():
|
|
|
670
675
|
nr_succeeded += 1
|
|
671
676
|
# else skipped - already exists
|
|
672
677
|
nr_errors = dict(nr_errors)
|
|
678
|
+
nr_all = nr_succeeded + nr_failed
|
|
673
679
|
if nr_failed > 0:
|
|
674
|
-
nr_all = nr_succeeded + nr_failed
|
|
675
680
|
if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
|
|
676
681
|
raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
|
|
677
682
|
self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
|
|
683
|
+
self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", nr_succeeded, nr_failed, nr_all, str(nr_errors))
|
|
678
684
|
return nr_succeeded, nr_failed, nr_errors, len(tasks)
|
|
679
685
|
|
|
680
686
|
def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
|
|
@@ -56,13 +56,19 @@ def run_processor(
|
|
|
56
56
|
- :py:attr:`output_file_grp`
|
|
57
57
|
- :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)
|
|
58
58
|
|
|
59
|
-
Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
|
|
60
|
-
This flag is used for an experimental feature we would like to adopt in future.
|
|
61
|
-
|
|
62
59
|
Run the processor on the workspace (creating output files in the filesystem).
|
|
63
60
|
|
|
64
61
|
Finally, write back the workspace (updating the METS in the filesystem).
|
|
65
62
|
|
|
63
|
+
If :py:attr:`instance_caching` is True, then processor instances (for the same set
|
|
64
|
+
of :py:attr:`parameter` values) will be cached internally. Thus, these objects (and
|
|
65
|
+
all their memory resources, like loaded models) get re-used instead of re-instantiated
|
|
66
|
+
when a match occurs - as long as the program is being run. They only get deleted (and
|
|
67
|
+
their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
|
|
68
|
+
instances have already been cached while this particular parameter set was re-used
|
|
69
|
+
least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
|
|
70
|
+
:py:class:`~ocrd_network.ProcessorServer` for use-cases.)
|
|
71
|
+
|
|
66
72
|
Args:
|
|
67
73
|
processorClass (object): Python class of the module processor.
|
|
68
74
|
"""
|
|
@@ -137,7 +143,7 @@ def run_processor(
|
|
|
137
143
|
t1_cpu,
|
|
138
144
|
processor.input_file_grp or '',
|
|
139
145
|
processor.output_file_grp or '',
|
|
140
|
-
json.dumps(processor.parameter
|
|
146
|
+
json.dumps(dict(processor.parameter or {})),
|
|
141
147
|
processor.page_id or ''
|
|
142
148
|
)
|
|
143
149
|
workspace.mets.add_agent(
|
|
@@ -148,7 +154,7 @@ def run_processor(
|
|
|
148
154
|
otherrole=otherrole,
|
|
149
155
|
notes=[({'option': 'input-file-grp'}, processor.input_file_grp or ''),
|
|
150
156
|
({'option': 'output-file-grp'}, processor.output_file_grp or ''),
|
|
151
|
-
({'option': 'parameter'}, json.dumps(processor.parameter or
|
|
157
|
+
({'option': 'parameter'}, json.dumps(dict(processor.parameter or {}))),
|
|
152
158
|
({'option': 'page-id'}, processor.page_id or '')]
|
|
153
159
|
)
|
|
154
160
|
workspace.save_mets()
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
|
|
4
4
|
#
|
|
5
|
-
# Generated
|
|
5
|
+
# Generated Mon Feb 17 10:32:54 2025 by generateDS.py version 2.44.1.
|
|
6
6
|
# Python 3.8.17+ (heads/3.8-dirty:1663f8ba84, Aug 15 2023, 18:13:01) [GCC 8.3.0]
|
|
7
7
|
#
|
|
8
8
|
# Command line options:
|
|
@@ -7112,7 +7112,7 @@ class OrderedGroupIndexedType(GeneratedsSuper):
|
|
|
7112
7112
|
else:
|
|
7113
7113
|
cleaned.append(entry)
|
|
7114
7114
|
for entry in cleaned:
|
|
7115
|
-
entry.export(outfile, level,
|
|
7115
|
+
entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
|
|
7116
7116
|
# end class OrderedGroupIndexedType
|
|
7117
7117
|
|
|
7118
7118
|
|
|
@@ -8075,7 +8075,7 @@ class OrderedGroupType(GeneratedsSuper):
|
|
|
8075
8075
|
else:
|
|
8076
8076
|
cleaned.append(entry)
|
|
8077
8077
|
for entry in cleaned:
|
|
8078
|
-
entry.export(outfile, level,
|
|
8078
|
+
entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
|
|
8079
8079
|
# end class OrderedGroupType
|
|
8080
8080
|
|
|
8081
8081
|
|
|
@@ -240,7 +240,7 @@ def test_find_all_files(start_mets_server : Tuple[str, Workspace]):
|
|
|
240
240
|
assert len(mets.find_all_files(mimetype='//application/.*')) == 22, '22 application/.*'
|
|
241
241
|
assert len(mets.find_all_files(mimetype=MIMETYPE_PAGE)) == 20, '20 ' + MIMETYPE_PAGE
|
|
242
242
|
assert len(mets.find_all_files(local_filename='OCR-D-IMG/FILE_0005_IMAGE.tif')) == 1, '1 FILE xlink:href="OCR-D-IMG/FILE_0005_IMAGE.tif"'
|
|
243
|
-
assert len(mets.find_all_files(url='https://github.com/OCR-D/assets/raw/master/data/SBB0000F29300010000/00000001_DESKEW.tif')) == 1, '1 URL xlink:href="https://github.com/OCR-D/assets/raw/master/data/SBB0000F29300010000/00000001_DESKEW.tif"'
|
|
243
|
+
assert len(mets.find_all_files(url='https://github.com/OCR-D/assets/raw/master/data/SBB0000F29300010000/data/00000001_DESKEW.tif')) == 1, '1 URL xlink:href="https://github.com/OCR-D/assets/raw/master/data/SBB0000F29300010000/data/00000001_DESKEW.tif"'
|
|
244
244
|
assert len(mets.find_all_files(pageId='PHYS_0001..PHYS_0005')) == 35, '35 files for page "PHYS_0001..PHYS_0005"'
|
|
245
245
|
assert len(mets.find_all_files(pageId='//PHYS_000(1|2)')) == 34, '34 files in PHYS_001 and PHYS_0002'
|
|
246
246
|
assert len(mets.find_all_files(pageId='//PHYS_0001,//PHYS_0005')) == 18, '18 files in PHYS_001 and PHYS_0005 (two regexes)'
|
|
@@ -224,7 +224,7 @@ def test_superfluous_copies_in_ws_dir(tmp_path):
|
|
|
224
224
|
https://github.com/OCR-D/core/issues/227
|
|
225
225
|
"""
|
|
226
226
|
# arrange
|
|
227
|
-
src_path = assets.path_to('
|
|
227
|
+
src_path = assets.path_to('sample_bagit-with-fetch/data/PPN595930174.xml')
|
|
228
228
|
dst_path = join(tmp_path, 'mets.xml')
|
|
229
229
|
copyfile(src_path, dst_path)
|
|
230
230
|
ws1 = Workspace(Resolver(), tmp_path)
|
|
@@ -233,12 +233,12 @@ def test_superfluous_copies_in_ws_dir(tmp_path):
|
|
|
233
233
|
assert count_files(tmp_path) == 1
|
|
234
234
|
|
|
235
235
|
# act
|
|
236
|
-
|
|
237
|
-
|
|
236
|
+
file1 = next(ws1.mets.find_files(fileGrp='DEFAULT'))
|
|
237
|
+
ws1.download_file(file1)
|
|
238
238
|
|
|
239
239
|
# assert
|
|
240
240
|
assert count_files(tmp_path) == 2
|
|
241
|
-
assert exists(join(tmp_path, '
|
|
241
|
+
assert exists(join(tmp_path, 'DEFAULT/FILE_0000_DEFAULT.jpg'))
|
|
242
242
|
|
|
243
243
|
|
|
244
244
|
@pytest.fixture(name='sbb_data_tmp')
|
ocrd-3.0.2/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
3.0.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|