ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +8 -6
- ocrd/cli/bashlib.py +8 -114
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/mets_server.py +11 -15
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
- ocrd-3.7.0.dist-info/RECORD +123 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +31 -33
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +5 -5
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +15 -7
- ocrd_utils/os.py +17 -4
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd/cli/log.py +0 -51
- ocrd/lib.bash +0 -317
- ocrd-3.5.1.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd_network/processor_server.py
DELETED
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from os import getpid
|
|
3
|
-
from subprocess import run as subprocess_run, PIPE
|
|
4
|
-
from uvicorn import run
|
|
5
|
-
|
|
6
|
-
from fastapi import APIRouter, BackgroundTasks, FastAPI, status
|
|
7
|
-
from fastapi.responses import FileResponse
|
|
8
|
-
|
|
9
|
-
from ocrd_utils import (
|
|
10
|
-
initLogging,
|
|
11
|
-
get_ocrd_tool_json,
|
|
12
|
-
getLogger,
|
|
13
|
-
parse_json_string_with_comments
|
|
14
|
-
)
|
|
15
|
-
from .constants import JobState, ServerApiTags
|
|
16
|
-
from .database import (
|
|
17
|
-
DBProcessorJob,
|
|
18
|
-
db_get_workspace,
|
|
19
|
-
db_update_processing_job,
|
|
20
|
-
db_get_processing_job,
|
|
21
|
-
initiate_database
|
|
22
|
-
)
|
|
23
|
-
from .logging_utils import (
|
|
24
|
-
configure_file_handler_with_formatter,
|
|
25
|
-
get_processor_server_logging_file_path,
|
|
26
|
-
get_processing_job_logging_file_path
|
|
27
|
-
)
|
|
28
|
-
from .models import PYJobInput, PYJobOutput, PYOcrdTool
|
|
29
|
-
from .process_helpers import invoke_processor
|
|
30
|
-
from .rabbitmq_utils import OcrdResultMessage
|
|
31
|
-
from .server_utils import (
|
|
32
|
-
_get_processor_job,
|
|
33
|
-
_get_processor_job_log,
|
|
34
|
-
raise_http_exception,
|
|
35
|
-
validate_and_return_mets_path,
|
|
36
|
-
validate_job_input
|
|
37
|
-
)
|
|
38
|
-
from .utils import calculate_execution_time, post_to_callback_url, generate_id
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class ProcessorServer(FastAPI):
|
|
42
|
-
def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
|
|
43
|
-
if not (processor_name or processor_class):
|
|
44
|
-
raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
|
|
45
|
-
super().__init__(
|
|
46
|
-
on_startup=[self.on_startup],
|
|
47
|
-
on_shutdown=[self.on_shutdown],
|
|
48
|
-
title=f"Network agent - Processor Server",
|
|
49
|
-
description="Network agent - Processor Server"
|
|
50
|
-
)
|
|
51
|
-
initLogging()
|
|
52
|
-
self.log = getLogger("ocrd_network.processor_server")
|
|
53
|
-
log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
|
|
54
|
-
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
|
|
55
|
-
|
|
56
|
-
self.db_url = mongodb_addr
|
|
57
|
-
self.processor_name = processor_name
|
|
58
|
-
self.processor_class = processor_class
|
|
59
|
-
self.ocrd_tool = None
|
|
60
|
-
self.version = None
|
|
61
|
-
|
|
62
|
-
self.version = self.get_version()
|
|
63
|
-
self.ocrd_tool = self.get_ocrd_tool()
|
|
64
|
-
|
|
65
|
-
if not self.ocrd_tool:
|
|
66
|
-
raise Exception(f"The ocrd_tool is empty or missing")
|
|
67
|
-
|
|
68
|
-
if not self.processor_name:
|
|
69
|
-
self.processor_name = self.ocrd_tool["executable"]
|
|
70
|
-
|
|
71
|
-
self.add_api_routes_processing()
|
|
72
|
-
self.log.info(f"Initialized processor server: {processor_name}")
|
|
73
|
-
|
|
74
|
-
async def on_startup(self):
|
|
75
|
-
await initiate_database(db_url=self.db_url)
|
|
76
|
-
|
|
77
|
-
async def on_shutdown(self) -> None:
|
|
78
|
-
"""
|
|
79
|
-
TODO: Perform graceful shutdown operations here
|
|
80
|
-
"""
|
|
81
|
-
pass
|
|
82
|
-
|
|
83
|
-
def add_api_routes_processing(self):
|
|
84
|
-
processing_router = APIRouter()
|
|
85
|
-
processing_router.add_api_route(
|
|
86
|
-
path="/info",
|
|
87
|
-
endpoint=self.get_processor_info,
|
|
88
|
-
methods=["GET"],
|
|
89
|
-
tags=[ServerApiTags.PROCESSING],
|
|
90
|
-
status_code=status.HTTP_200_OK,
|
|
91
|
-
summary="Get information about this processor.",
|
|
92
|
-
response_model=PYOcrdTool,
|
|
93
|
-
response_model_exclude_unset=True,
|
|
94
|
-
response_model_exclude_none=True
|
|
95
|
-
)
|
|
96
|
-
processing_router.add_api_route(
|
|
97
|
-
path="/run",
|
|
98
|
-
endpoint=self.create_processor_task,
|
|
99
|
-
methods=["POST"],
|
|
100
|
-
tags=[ServerApiTags.PROCESSING],
|
|
101
|
-
status_code=status.HTTP_202_ACCEPTED,
|
|
102
|
-
summary="Submit a job to this processor.",
|
|
103
|
-
response_model=PYJobOutput,
|
|
104
|
-
response_model_exclude_unset=True,
|
|
105
|
-
response_model_exclude_none=True
|
|
106
|
-
)
|
|
107
|
-
processing_router.add_api_route(
|
|
108
|
-
path="/job/{job_id}",
|
|
109
|
-
endpoint=self.get_processor_job,
|
|
110
|
-
methods=["GET"],
|
|
111
|
-
tags=[ServerApiTags.PROCESSING],
|
|
112
|
-
status_code=status.HTTP_200_OK,
|
|
113
|
-
summary="Get information about a job based on its ID",
|
|
114
|
-
response_model=PYJobOutput,
|
|
115
|
-
response_model_exclude_unset=True,
|
|
116
|
-
response_model_exclude_none=True
|
|
117
|
-
)
|
|
118
|
-
processing_router.add_api_route(
|
|
119
|
-
path="/log/{job_id}",
|
|
120
|
-
endpoint=self.get_processor_job_log,
|
|
121
|
-
methods=["GET"],
|
|
122
|
-
tags=[ServerApiTags.PROCESSING],
|
|
123
|
-
status_code=status.HTTP_200_OK,
|
|
124
|
-
summary="Get the log file of a job id"
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
async def get_processor_info(self):
|
|
128
|
-
if not self.ocrd_tool:
|
|
129
|
-
message = "Empty or missing ocrd tool json."
|
|
130
|
-
raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
|
|
131
|
-
return self.ocrd_tool
|
|
132
|
-
|
|
133
|
-
# Note: The Processing server pushes to a queue, while
|
|
134
|
-
# the Processor Server creates (pushes to) a background task
|
|
135
|
-
async def create_processor_task(self, job_input: PYJobInput, background_tasks: BackgroundTasks):
|
|
136
|
-
validate_job_input(self.log, self.processor_name, self.ocrd_tool, job_input)
|
|
137
|
-
job_input.path_to_mets = await validate_and_return_mets_path(self.log, job_input)
|
|
138
|
-
|
|
139
|
-
# The request is not forwarded from the Processing Server, assign a job_id
|
|
140
|
-
if not job_input.job_id:
|
|
141
|
-
job_id = generate_id()
|
|
142
|
-
# Create a DB entry
|
|
143
|
-
job = DBProcessorJob(
|
|
144
|
-
**job_input.dict(exclude_unset=True, exclude_none=True),
|
|
145
|
-
job_id=job_id,
|
|
146
|
-
processor_name=self.processor_name,
|
|
147
|
-
state=JobState.queued
|
|
148
|
-
)
|
|
149
|
-
await job.insert()
|
|
150
|
-
else:
|
|
151
|
-
job = await db_get_processing_job(job_input.job_id)
|
|
152
|
-
# await self.run_processor_task(job=job)
|
|
153
|
-
background_tasks.add_task(self.run_processor_task, job)
|
|
154
|
-
return job.to_job_output()
|
|
155
|
-
|
|
156
|
-
async def run_processor_task(self, job: DBProcessorJob):
|
|
157
|
-
execution_failed = False
|
|
158
|
-
start_time = datetime.now()
|
|
159
|
-
job_log_file = get_processing_job_logging_file_path(job_id=job.job_id)
|
|
160
|
-
await db_update_processing_job(
|
|
161
|
-
job_id=job.job_id,
|
|
162
|
-
state=JobState.running,
|
|
163
|
-
start_time=start_time,
|
|
164
|
-
log_file_path=job_log_file
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
mets_server_url = (await db_get_workspace(workspace_mets_path=job.path_to_mets)).mets_server_url
|
|
168
|
-
try:
|
|
169
|
-
invoke_processor(
|
|
170
|
-
processor_class=self.processor_class,
|
|
171
|
-
executable=self.processor_name,
|
|
172
|
-
abs_path_to_mets=job.path_to_mets,
|
|
173
|
-
input_file_grps=job.input_file_grps,
|
|
174
|
-
output_file_grps=job.output_file_grps,
|
|
175
|
-
page_id=job.page_id,
|
|
176
|
-
parameters=job.parameters,
|
|
177
|
-
mets_server_url=mets_server_url,
|
|
178
|
-
log_filename=job_log_file,
|
|
179
|
-
)
|
|
180
|
-
except Exception as error:
|
|
181
|
-
self.log.debug(f"processor_name: {self.processor_name}, path_to_mets: {job.path_to_mets}, "
|
|
182
|
-
f"input_grps: {job.input_file_grps}, output_file_grps: {job.output_file_grps}, "
|
|
183
|
-
f"page_id: {job.page_id}, parameters: {job.parameters}")
|
|
184
|
-
self.log.exception(error)
|
|
185
|
-
execution_failed = True
|
|
186
|
-
end_time = datetime.now()
|
|
187
|
-
exec_duration = calculate_execution_time(start_time, end_time)
|
|
188
|
-
job_state = JobState.success if not execution_failed else JobState.failed
|
|
189
|
-
await db_update_processing_job(
|
|
190
|
-
job_id=job.job_id,
|
|
191
|
-
state=job_state,
|
|
192
|
-
end_time=end_time,
|
|
193
|
-
exec_time=f"{exec_duration} ms"
|
|
194
|
-
)
|
|
195
|
-
result_message = OcrdResultMessage(
|
|
196
|
-
job_id=job.job_id,
|
|
197
|
-
state=job_state.value,
|
|
198
|
-
path_to_mets=job.path_to_mets,
|
|
199
|
-
# May not be always available
|
|
200
|
-
workspace_id=job.workspace_id if job.workspace_id else ''
|
|
201
|
-
)
|
|
202
|
-
self.log.info(f"Result message: {result_message}")
|
|
203
|
-
if job.callback_url:
|
|
204
|
-
# If the callback_url field is set,
|
|
205
|
-
# post the result message (callback to a user defined endpoint)
|
|
206
|
-
post_to_callback_url(self.log, job.callback_url, result_message)
|
|
207
|
-
if job.internal_callback_url:
|
|
208
|
-
# If the internal callback_url field is set,
|
|
209
|
-
# post the result message (callback to Processing Server endpoint)
|
|
210
|
-
post_to_callback_url(self.log, job.internal_callback_url, result_message)
|
|
211
|
-
|
|
212
|
-
def get_ocrd_tool(self):
|
|
213
|
-
if self.ocrd_tool:
|
|
214
|
-
return self.ocrd_tool
|
|
215
|
-
if self.processor_class:
|
|
216
|
-
# The way of accessing ocrd tool like in the line below may be problematic
|
|
217
|
-
# ocrd_tool = self.processor_class(workspace=None, version=True).ocrd_tool
|
|
218
|
-
ocrd_tool = parse_json_string_with_comments(
|
|
219
|
-
subprocess_run(
|
|
220
|
-
[self.processor_name, "--dump-json"],
|
|
221
|
-
stdout=PIPE,
|
|
222
|
-
check=True,
|
|
223
|
-
universal_newlines=True
|
|
224
|
-
).stdout
|
|
225
|
-
)
|
|
226
|
-
else:
|
|
227
|
-
ocrd_tool = get_ocrd_tool_json(self.processor_name)
|
|
228
|
-
return ocrd_tool
|
|
229
|
-
|
|
230
|
-
def get_version(self) -> str:
|
|
231
|
-
if self.version:
|
|
232
|
-
return self.version
|
|
233
|
-
|
|
234
|
-
"""
|
|
235
|
-
if self.processor_class:
|
|
236
|
-
# The way of accessing the version like in the line below may be problematic
|
|
237
|
-
# version_str = self.processor_class(workspace=None, version=True).version
|
|
238
|
-
return version_str
|
|
239
|
-
"""
|
|
240
|
-
version_str = subprocess_run(
|
|
241
|
-
[self.processor_name, "--version"],
|
|
242
|
-
stdout=PIPE,
|
|
243
|
-
check=True,
|
|
244
|
-
universal_newlines=True
|
|
245
|
-
).stdout
|
|
246
|
-
return version_str
|
|
247
|
-
|
|
248
|
-
def run_server(self, host, port):
|
|
249
|
-
run(self, host=host, port=port)
|
|
250
|
-
|
|
251
|
-
async def get_processor_job(self, job_id: str) -> PYJobOutput:
|
|
252
|
-
return await _get_processor_job(self.log, job_id)
|
|
253
|
-
|
|
254
|
-
async def get_processor_job_log(self, job_id: str) -> FileResponse:
|
|
255
|
-
return await _get_processor_job_log(self.log, job_id)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|