ocrd 3.3.2__tar.gz → 3.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.3.2/src/ocrd.egg-info → ocrd-3.4.1}/PKG-INFO +1 -1
- ocrd-3.4.1/VERSION +1 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/__init__.py +6 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/ocrd_tool.py +9 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/decorators/ocrd_cli_options.py +1 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/mets_server.py +2 -2
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/base.py +21 -13
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/workspace.py +33 -48
- {ocrd-3.3.2 → ocrd-3.4.1/src/ocrd.egg-info}/PKG-INFO +1 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/constants.py +9 -5
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/logging_utils.py +12 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/config.py +12 -2
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/logging.py +10 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/os.py +1 -1
- ocrd-3.3.2/VERSION +0 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/LICENSE +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/MANIFEST.in +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_bashlib.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd_models.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd_network.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd_utils.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/README_ocrd_validators.md +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/pyproject.toml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/requirements.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/setup.cfg +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/bashlib.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/validate.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/workspace.py +1 -1
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/constants.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/decorators/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/lib.bash +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/builtin/filter_processor.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/helpers.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/processor/ocrd_page_result.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/resolver.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/resource_manager.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/workspace_backup.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd.egg-info/SOURCES.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd.egg-info/requires.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_modelfactory/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/constants.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_exif.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_file.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_mets.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_page.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_page_generateds.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/ocrd_xml_base.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/report.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_models/xpath_functions.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/cli/client.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/client.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/client_utils.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/database.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/processing_server.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/deployer.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/server_utils.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/image.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_utils/str.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/json_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/page_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/parameter_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/resource_list_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/workspace_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/src/ocrd_validators/xsd_validator.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_decorators.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_logging.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_logging_conf.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_mets_server.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_model_factory.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_resolver.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_resource_manager.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_task_sequence.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_utils.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_version.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_workspace.py +0 -0
- {ocrd-3.3.2 → ocrd-3.4.1}/tests/test_workspace_remove.py +0 -0
ocrd-3.4.1/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.4.1
|
|
@@ -67,6 +67,12 @@ Variables:
|
|
|
67
67
|
\b
|
|
68
68
|
{config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
|
|
69
69
|
\b
|
|
70
|
+
{config.describe('OCRD_MAX_MISSING_OUTPUTS')}
|
|
71
|
+
\b
|
|
72
|
+
{config.describe('OCRD_MAX_PARALLEL_PAGES')}
|
|
73
|
+
\b
|
|
74
|
+
{config.describe('OCRD_PROCESSING_PAGE_TIMEOUT')}
|
|
75
|
+
\b
|
|
70
76
|
{config.describe('OCRD_METS_CACHING')}
|
|
71
77
|
\b
|
|
72
78
|
{config.describe('OCRD_MAX_PROCESSOR_CACHE')}
|
|
@@ -18,7 +18,8 @@ from ocrd.processor import Processor
|
|
|
18
18
|
from ocrd_utils import (
|
|
19
19
|
set_json_key_value_overrides,
|
|
20
20
|
parse_json_string_or_file,
|
|
21
|
-
parse_json_string_with_comments as loads
|
|
21
|
+
parse_json_string_with_comments as loads,
|
|
22
|
+
get_moduledir
|
|
22
23
|
)
|
|
23
24
|
from ocrd_validators import ParameterValidator, OcrdToolValidator
|
|
24
25
|
|
|
@@ -104,6 +105,13 @@ def ocrd_tool_list(ctx):
|
|
|
104
105
|
def ocrd_tool_dump(ctx):
|
|
105
106
|
print(dumps(ctx.json['tools'], indent=True))
|
|
106
107
|
|
|
108
|
+
@ocrd_tool_cli.command('dump-module-dirs', help="Dump module directory of each tool")
|
|
109
|
+
@pass_ocrd_tool
|
|
110
|
+
def ocrd_tool_dump_module_dirs(ctx):
|
|
111
|
+
print(dumps({tool_name: get_moduledir(tool_name)
|
|
112
|
+
for tool_name in ctx.json['tools']},
|
|
113
|
+
indent=True))
|
|
114
|
+
|
|
107
115
|
# ----------------------------------------------------------------------
|
|
108
116
|
# ocrd ocrd-tool tool
|
|
109
117
|
# ----------------------------------------------------------------------
|
|
@@ -56,7 +56,7 @@ def ocrd_cli_options(f):
|
|
|
56
56
|
# subcommands. So we have to work around that by creating a
|
|
57
57
|
# pseudo-subcommand handled in ocrd_cli_wrap_processor
|
|
58
58
|
argument('subcommand', nargs=1, required=False,
|
|
59
|
-
type=click.Choice(
|
|
59
|
+
type=click.Choice(list(map(str, AgentType)))),
|
|
60
60
|
]
|
|
61
61
|
for param in params:
|
|
62
62
|
param(f)
|
|
@@ -46,7 +46,7 @@ class OcrdFileModel(BaseModel):
|
|
|
46
46
|
):
|
|
47
47
|
return OcrdFileModel(
|
|
48
48
|
file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url,
|
|
49
|
-
local_filename=str(local_filename)
|
|
49
|
+
local_filename=str(local_filename) if local_filename else None
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
@@ -314,7 +314,7 @@ class ClientSideOcrdMets:
|
|
|
314
314
|
else:
|
|
315
315
|
r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, kwargs))
|
|
316
316
|
if not r.ok:
|
|
317
|
-
raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()
|
|
317
|
+
raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()}")
|
|
318
318
|
|
|
319
319
|
return ClientSideOcrdFile(
|
|
320
320
|
None, fileGrp=file_grp,
|
|
@@ -29,8 +29,7 @@ from frozendict import frozendict
|
|
|
29
29
|
# this is where the fixes came from:
|
|
30
30
|
from loky import Future, ProcessPoolExecutor
|
|
31
31
|
import multiprocessing as mp
|
|
32
|
-
from
|
|
33
|
-
from _thread import interrupt_main
|
|
32
|
+
from multiprocessing.pool import ThreadPool
|
|
34
33
|
|
|
35
34
|
from click import wrap_text
|
|
36
35
|
from deprecated import deprecated
|
|
@@ -783,11 +782,16 @@ class Processor():
|
|
|
783
782
|
page_id = input_files[input_pos].pageId
|
|
784
783
|
self._base_logger.info("processing page %s", page_id)
|
|
785
784
|
for i, input_file in enumerate(input_files):
|
|
785
|
+
grp = self.input_file_grp.split(',')[i]
|
|
786
786
|
if input_file is None:
|
|
787
|
-
grp = self.input_file_grp.split(',')[i]
|
|
788
787
|
self._base_logger.debug(f"ignoring missing file for input fileGrp {grp} for page {page_id}")
|
|
789
788
|
continue
|
|
790
789
|
assert isinstance(input_file, get_args(OcrdFileType))
|
|
790
|
+
if not input_file.local_filename:
|
|
791
|
+
self._base_logger.error(f'No local file exists for page {page_id} in file group {grp}')
|
|
792
|
+
if config.OCRD_MISSING_INPUT == 'ABORT':
|
|
793
|
+
raise MissingInputFile(grp, page_id, input_file.mimetype)
|
|
794
|
+
continue
|
|
791
795
|
self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
|
|
792
796
|
try:
|
|
793
797
|
page_ = page_from_file(input_file)
|
|
@@ -796,6 +800,9 @@ class Processor():
|
|
|
796
800
|
except ValueError as err:
|
|
797
801
|
# not PAGE and not an image to generate PAGE for
|
|
798
802
|
self._base_logger.error(f"non-PAGE input for page {page_id}: {err}")
|
|
803
|
+
if not any(input_pcgts):
|
|
804
|
+
self._base_logger.warning(f'skipping page {page_id}')
|
|
805
|
+
return
|
|
799
806
|
output_file_id = make_file_id(input_files[input_pos], self.output_file_grp)
|
|
800
807
|
if input_files[input_pos].fileGrp == self.output_file_grp:
|
|
801
808
|
# input=output fileGrp: re-use ID exactly
|
|
@@ -1107,7 +1114,11 @@ class Processor():
|
|
|
1107
1114
|
self._base_logger.critical(f"Could not find any files for selected pageId {self.page_id}.\n"
|
|
1108
1115
|
f"compare '{self.page_id}' with the output of 'orcd workspace list-page'.")
|
|
1109
1116
|
ifts = []
|
|
1110
|
-
|
|
1117
|
+
# use physical page order
|
|
1118
|
+
for page in self.workspace.mets.physical_pages:
|
|
1119
|
+
if page not in pages:
|
|
1120
|
+
continue
|
|
1121
|
+
ifiles = pages[page]
|
|
1111
1122
|
for i, ifg in enumerate(ifgs):
|
|
1112
1123
|
if not ifiles[i]:
|
|
1113
1124
|
# could be from non-unique with on_error=skip or from true gap
|
|
@@ -1150,18 +1161,15 @@ def _page_worker(timeout, *input_files):
|
|
|
1150
1161
|
"""
|
|
1151
1162
|
page_id = next((file.pageId for file in input_files
|
|
1152
1163
|
if hasattr(file, 'pageId')), "")
|
|
1153
|
-
|
|
1154
|
-
timer = Timer(timeout, interrupt_main)
|
|
1155
|
-
timer.start()
|
|
1164
|
+
pool = ThreadPool(processes=1)
|
|
1156
1165
|
try:
|
|
1157
|
-
_page_worker_processor.process_page_file(*input_files)
|
|
1166
|
+
#_page_worker_processor.process_page_file(*input_files)
|
|
1167
|
+
async_result = pool.apply_async(_page_worker_processor.process_page_file, input_files)
|
|
1168
|
+
async_result.get(timeout or None)
|
|
1158
1169
|
_page_worker_processor.logger.debug("page worker completed for page %s", page_id)
|
|
1159
|
-
except
|
|
1170
|
+
except mp.TimeoutError:
|
|
1160
1171
|
_page_worker_processor.logger.debug("page worker timed out for page %s", page_id)
|
|
1161
|
-
raise
|
|
1162
|
-
finally:
|
|
1163
|
-
if timeout > 0:
|
|
1164
|
-
timer.cancel()
|
|
1172
|
+
raise
|
|
1165
1173
|
|
|
1166
1174
|
def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None):
|
|
1167
1175
|
"""Generate a string describing the full CLI of this processor including params.
|
|
@@ -5,7 +5,7 @@ from shutil import copyfileobj
|
|
|
5
5
|
from re import sub
|
|
6
6
|
from tempfile import NamedTemporaryFile
|
|
7
7
|
from contextlib import contextmanager
|
|
8
|
-
from typing import Optional, Union
|
|
8
|
+
from typing import Optional, Union, Callable
|
|
9
9
|
|
|
10
10
|
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor
|
|
11
11
|
from PIL import Image
|
|
@@ -457,6 +457,20 @@ class Workspace():
|
|
|
457
457
|
with atomic_write(self.mets_target) as f:
|
|
458
458
|
f.write(self.mets.to_xml(xmllint=True).decode('utf-8'))
|
|
459
459
|
|
|
460
|
+
def _apply_mets_file(self, filename_or_url: str, fun: Callable):
|
|
461
|
+
if not filename_or_url:
|
|
462
|
+
# avoid "finding" just any file
|
|
463
|
+
raise ValueError("requires non-empty filename or URL")
|
|
464
|
+
with pushd_popd(self.directory):
|
|
465
|
+
if Path(filename_or_url).exists():
|
|
466
|
+
return fun(filename_or_url)
|
|
467
|
+
if image_file := next(self.mets.find_files(local_filename=str(filename_or_url)), None):
|
|
468
|
+
return fun(image_file.local_filename)
|
|
469
|
+
if image_file := next(self.mets.find_files(url=str(filename_or_url)), None):
|
|
470
|
+
return fun(self.download_file(image_file).local_filename)
|
|
471
|
+
with download_temporary_file(filename_or_url) as f:
|
|
472
|
+
return fun(f.name)
|
|
473
|
+
|
|
460
474
|
def resolve_image_exif(self, image_url):
|
|
461
475
|
"""
|
|
462
476
|
Get the EXIF metadata about an image URL as :py:class:`ocrd_models.ocrd_exif.OcrdExif`
|
|
@@ -467,19 +481,7 @@ class Workspace():
|
|
|
467
481
|
Returns:
|
|
468
482
|
:py:class:`ocrd_models.ocrd_exif.OcrdExif`
|
|
469
483
|
"""
|
|
470
|
-
|
|
471
|
-
# avoid "finding" just any file
|
|
472
|
-
raise ValueError(f"'image_url' must be a non-empty string, not '{image_url}' ({type(image_url)})")
|
|
473
|
-
try:
|
|
474
|
-
f = next(self.mets.find_files(local_filename=str(image_url)))
|
|
475
|
-
return exif_from_filename(f.local_filename)
|
|
476
|
-
except StopIteration:
|
|
477
|
-
try:
|
|
478
|
-
f = next(self.mets.find_files(url=str(image_url)))
|
|
479
|
-
return exif_from_filename(self.download_file(f).local_filename)
|
|
480
|
-
except StopIteration:
|
|
481
|
-
with download_temporary_file(image_url) as f:
|
|
482
|
-
return exif_from_filename(f.name)
|
|
484
|
+
return self._apply_mets_file(image_url, exif_from_filename)
|
|
483
485
|
|
|
484
486
|
@deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment")
|
|
485
487
|
def resolve_image_as_pil(self, image_url, coords=None):
|
|
@@ -498,22 +500,9 @@ class Workspace():
|
|
|
498
500
|
return self._resolve_image_as_pil(image_url, coords)
|
|
499
501
|
|
|
500
502
|
def _resolve_image_as_pil(self, image_url, coords=None):
|
|
501
|
-
if not image_url:
|
|
502
|
-
# avoid "finding" just any file
|
|
503
|
-
raise Exception("Cannot resolve empty image path")
|
|
504
503
|
log = getLogger('ocrd.workspace._resolve_image_as_pil')
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
f = next(self.mets.find_files(local_filename=str(image_url)))
|
|
508
|
-
pil_image = Image.open(f.local_filename)
|
|
509
|
-
except StopIteration:
|
|
510
|
-
try:
|
|
511
|
-
f = next(self.mets.find_files(url=str(image_url)))
|
|
512
|
-
pil_image = Image.open(self.download_file(f).local_filename)
|
|
513
|
-
except StopIteration:
|
|
514
|
-
with download_temporary_file(image_url) as f:
|
|
515
|
-
pil_image = Image.open(f.name)
|
|
516
|
-
pil_image.load() # alloc and give up the FD
|
|
504
|
+
pil_image = self._apply_mets_file(image_url, Image.open)
|
|
505
|
+
pil_image.load() # alloc and give up the FD
|
|
517
506
|
|
|
518
507
|
# Pillow does not properly support higher color depths
|
|
519
508
|
# (e.g. 16-bit or 32-bit or floating point grayscale),
|
|
@@ -788,16 +777,14 @@ class Workspace():
|
|
|
788
777
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' +
|
|
789
778
|
'filename="%s" in page "%s"' % (
|
|
790
779
|
filename, page_id))
|
|
791
|
-
if not all(feature in page_coords['features']
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
'filter="%s" in page "%s"' % (
|
|
800
|
-
feature_filter, page_id))
|
|
780
|
+
if (not all(feature in page_coords['features']
|
|
781
|
+
for feature in feature_selector.split(',') if feature) or
|
|
782
|
+
any(feature in page_coords['features']
|
|
783
|
+
for feature in feature_filter.split(',') if feature)):
|
|
784
|
+
raise Exception('Found no AlternativeImage that satisfies all requirements' +
|
|
785
|
+
' selector="%s"' % feature_selector +
|
|
786
|
+
' filter="%s"' % feature_filter +
|
|
787
|
+
' in page "%s"' % page_id)
|
|
801
788
|
# ensure DPI will be set in image meta-data again
|
|
802
789
|
if 'DPI' in page_coords:
|
|
803
790
|
dpi = page_coords['DPI']
|
|
@@ -1049,16 +1036,14 @@ class Workspace():
|
|
|
1049
1036
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' +
|
|
1050
1037
|
'filename="%s" in segment "%s"' % (
|
|
1051
1038
|
filename, segment.id))
|
|
1052
|
-
if not all(feature in segment_coords['features']
|
|
1053
|
-
|
|
1039
|
+
if (not all(feature in segment_coords['features']
|
|
1040
|
+
for feature in feature_selector.split(',') if feature) or
|
|
1041
|
+
any(feature in segment_coords['features']
|
|
1042
|
+
for feature in feature_filter.split(',') if feature)):
|
|
1054
1043
|
raise Exception('Found no AlternativeImage that satisfies all requirements' +
|
|
1055
|
-
'selector="%s"
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
for feature in feature_filter.split(',') if feature):
|
|
1059
|
-
raise Exception('Found no AlternativeImage that satisfies all requirements ' +
|
|
1060
|
-
'filter="%s" in segment "%s"' % (
|
|
1061
|
-
feature_filter, segment.id))
|
|
1044
|
+
' selector="%s"' % feature_selector +
|
|
1045
|
+
' filter="%s"' % feature_filter +
|
|
1046
|
+
' in segment "%s"' % segment.id)
|
|
1062
1047
|
# ensure DPI will be set in image meta-data again
|
|
1063
1048
|
if 'DPI' in segment_coords:
|
|
1064
1049
|
dpi = segment_coords['DPI']
|
|
@@ -11,12 +11,16 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
|
|
|
11
11
|
SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
14
|
+
class StrEnum(str, Enum):
|
|
15
|
+
def __str__(self):
|
|
16
|
+
return self.value
|
|
17
|
+
|
|
18
|
+
class AgentType(StrEnum):
|
|
15
19
|
PROCESSING_WORKER = "worker"
|
|
16
20
|
PROCESSOR_SERVER = "server"
|
|
17
21
|
|
|
18
22
|
|
|
19
|
-
class DeployType(
|
|
23
|
+
class DeployType(StrEnum):
|
|
20
24
|
# Deployed by the Processing Server config file
|
|
21
25
|
DOCKER = "docker"
|
|
22
26
|
NATIVE = "native"
|
|
@@ -26,7 +30,7 @@ class DeployType(str, Enum):
|
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
# TODO: Make the states uppercase
|
|
29
|
-
class JobState(
|
|
33
|
+
class JobState(StrEnum):
|
|
30
34
|
# The processing job is cached inside the Processing Server requests cache
|
|
31
35
|
cached = "CACHED"
|
|
32
36
|
# The processing job was cancelled due to failed dependencies
|
|
@@ -43,7 +47,7 @@ class JobState(str, Enum):
|
|
|
43
47
|
unset = "UNSET"
|
|
44
48
|
|
|
45
49
|
|
|
46
|
-
class NetworkLoggingDirs(
|
|
50
|
+
class NetworkLoggingDirs(StrEnum):
|
|
47
51
|
METS_SERVERS = "mets_servers"
|
|
48
52
|
PROCESSING_JOBS = "processing_jobs"
|
|
49
53
|
PROCESSING_SERVERS = "processing_servers"
|
|
@@ -51,7 +55,7 @@ class NetworkLoggingDirs(str, Enum):
|
|
|
51
55
|
PROCESSOR_SERVERS = "processor_servers"
|
|
52
56
|
|
|
53
57
|
|
|
54
|
-
class ServerApiTags(
|
|
58
|
+
class ServerApiTags(StrEnum):
|
|
55
59
|
ADMIN = "admin"
|
|
56
60
|
DISCOVERY = "discovery"
|
|
57
61
|
PROCESSING = "processing"
|
|
@@ -9,11 +9,22 @@ def configure_file_handler_with_formatter(logger: Logger, log_file: Path, mode:
|
|
|
9
9
|
file_handler = FileHandler(filename=log_file, mode=mode)
|
|
10
10
|
file_handler.setFormatter(Formatter(LOG_FORMAT))
|
|
11
11
|
logger.addHandler(file_handler)
|
|
12
|
+
try:
|
|
13
|
+
log_file.chmod(0o666)
|
|
14
|
+
except PermissionError:
|
|
15
|
+
# if the file exists the permissions are supposed to already fit
|
|
16
|
+
pass
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
def get_root_logging_dir(module_name: NetworkLoggingDirs) -> Path:
|
|
15
20
|
module_log_dir = Path(config.OCRD_NETWORK_LOGS_ROOT_DIR, module_name.value)
|
|
16
|
-
|
|
21
|
+
try:
|
|
22
|
+
module_log_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
module_log_dir.chmod(0o777)
|
|
24
|
+
except PermissionError:
|
|
25
|
+
# if the folder exists the permissions are supposed to already fit
|
|
26
|
+
pass
|
|
27
|
+
|
|
17
28
|
return module_log_dir
|
|
18
29
|
|
|
19
30
|
|
|
@@ -277,13 +277,23 @@ config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
|
|
|
277
277
|
description="The root directory where all mets server related socket files are created",
|
|
278
278
|
parser=lambda val: Path(val),
|
|
279
279
|
default=(True, Path(gettempdir(), "ocrd_network_sockets")))
|
|
280
|
-
config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(mode=0o777, parents=True, exist_ok=True)
|
|
281
|
+
try:
|
|
282
|
+
config.OCRD_NETWORK_SOCKETS_ROOT_DIR.chmod(0o777)
|
|
283
|
+
except PermissionError:
|
|
284
|
+
# if the folder exists the permissions are supposed to already fit
|
|
285
|
+
pass
|
|
281
286
|
|
|
282
287
|
config.add(name="OCRD_NETWORK_LOGS_ROOT_DIR",
|
|
283
288
|
description="The root directory where all ocrd_network related file logs are stored",
|
|
284
289
|
parser=lambda val: Path(val),
|
|
285
290
|
default=(True, Path(gettempdir(), "ocrd_network_logs")))
|
|
286
|
-
config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
291
|
+
config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(mode=0o777, parents=True, exist_ok=True)
|
|
292
|
+
try:
|
|
293
|
+
config.OCRD_NETWORK_LOGS_ROOT_DIR.chmod(0o777)
|
|
294
|
+
except PermissionError:
|
|
295
|
+
# if the folder exists the permissions are supposed to already fit
|
|
296
|
+
pass
|
|
287
297
|
|
|
288
298
|
config.add("HOME",
|
|
289
299
|
description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
|
|
@@ -32,6 +32,7 @@ import logging
|
|
|
32
32
|
import logging.config
|
|
33
33
|
from pathlib import Path
|
|
34
34
|
import sys
|
|
35
|
+
from os import chmod
|
|
35
36
|
|
|
36
37
|
from .constants import LOG_FORMAT, LOG_TIMEFMT
|
|
37
38
|
from .config import config
|
|
@@ -166,6 +167,15 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
|
|
|
166
167
|
if not silent:
|
|
167
168
|
print(f"[LOGGING] Picked up logging config at {config_file}", file=sys.stderr)
|
|
168
169
|
logging.config.fileConfig(config_file)
|
|
170
|
+
# Set permission of processing-server logfile to 666 to prevent possible permission erros while logging
|
|
171
|
+
try:
|
|
172
|
+
network_logger = logging.getLogger("ocrd_network")
|
|
173
|
+
for handler in network_logger.handlers:
|
|
174
|
+
if isinstance(handler, logging.FileHandler):
|
|
175
|
+
chmod(handler.baseFilename, 0o666)
|
|
176
|
+
except PermissionError:
|
|
177
|
+
# if the file exists the permissions are supposed to already fit
|
|
178
|
+
pass
|
|
169
179
|
else:
|
|
170
180
|
if not silent:
|
|
171
181
|
print("[LOGGING] Initializing logging with built-in defaults", file=sys.stderr)
|
|
@@ -254,7 +254,7 @@ def guess_media_type(input_file : str, fallback : str = None, application_xml :
|
|
|
254
254
|
if mimetype is None:
|
|
255
255
|
mimetype = EXT_TO_MIME.get(''.join(Path(input_file).suffixes), fallback)
|
|
256
256
|
if mimetype is None:
|
|
257
|
-
raise ValueError("Could not determine MIME type of input_file
|
|
257
|
+
raise ValueError(f"Could not determine MIME type of {input_file}")
|
|
258
258
|
if mimetype == 'application/xml':
|
|
259
259
|
mimetype = application_xml
|
|
260
260
|
return mimetype
|
ocrd-3.3.2/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
3.3.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -474,11 +474,11 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
|
|
|
474
474
|
if wait:
|
|
475
475
|
time.sleep(wait)
|
|
476
476
|
if undo_download and f.url and f.local_filename:
|
|
477
|
-
f.local_filename = None
|
|
478
477
|
modified_mets = True
|
|
479
478
|
if not keep_files:
|
|
480
479
|
ctx.log.debug("rm %s [cwd=%s]", f.local_filename, workspace.directory)
|
|
481
480
|
unlink(f.local_filename)
|
|
481
|
+
f.local_filename = None
|
|
482
482
|
ret_entry = [f.ID if field == 'pageId' else str(getattr(f, field)) or '' for field in output_field]
|
|
483
483
|
ret.append(ret_entry)
|
|
484
484
|
if modified_mets:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|