ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ocrd/cli/__init__.py +8 -6
  2. ocrd/cli/bashlib.py +8 -114
  3. ocrd/cli/network.py +0 -2
  4. ocrd/cli/ocrd_tool.py +26 -4
  5. ocrd/cli/process.py +1 -0
  6. ocrd/cli/resmgr.py +0 -1
  7. ocrd/cli/validate.py +32 -13
  8. ocrd/cli/workspace.py +125 -52
  9. ocrd/cli/zip.py +13 -4
  10. ocrd/decorators/__init__.py +28 -52
  11. ocrd/decorators/loglevel_option.py +4 -0
  12. ocrd/decorators/mets_find_options.py +2 -1
  13. ocrd/decorators/ocrd_cli_options.py +3 -7
  14. ocrd/decorators/parameter_option.py +12 -11
  15. ocrd/mets_server.py +11 -15
  16. ocrd/processor/base.py +88 -71
  17. ocrd/processor/builtin/dummy_processor.py +7 -4
  18. ocrd/processor/builtin/filter_processor.py +3 -2
  19. ocrd/processor/helpers.py +5 -6
  20. ocrd/processor/ocrd_page_result.py +7 -5
  21. ocrd/resolver.py +42 -32
  22. ocrd/task_sequence.py +11 -4
  23. ocrd/workspace.py +64 -54
  24. ocrd/workspace_backup.py +3 -0
  25. ocrd/workspace_bagger.py +15 -8
  26. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
  27. ocrd-3.7.0.dist-info/RECORD +123 -0
  28. ocrd_modelfactory/__init__.py +4 -2
  29. ocrd_models/constants.py +18 -1
  30. ocrd_models/ocrd_agent.py +1 -1
  31. ocrd_models/ocrd_exif.py +7 -3
  32. ocrd_models/ocrd_file.py +24 -19
  33. ocrd_models/ocrd_mets.py +90 -67
  34. ocrd_models/ocrd_page.py +17 -13
  35. ocrd_models/ocrd_xml_base.py +1 -0
  36. ocrd_models/report.py +2 -1
  37. ocrd_models/utils.py +4 -3
  38. ocrd_models/xpath_functions.py +3 -1
  39. ocrd_network/__init__.py +1 -2
  40. ocrd_network/cli/__init__.py +0 -2
  41. ocrd_network/cli/client.py +122 -50
  42. ocrd_network/cli/processing_server.py +1 -2
  43. ocrd_network/client.py +2 -2
  44. ocrd_network/client_utils.py +30 -13
  45. ocrd_network/constants.py +1 -6
  46. ocrd_network/database.py +3 -3
  47. ocrd_network/logging_utils.py +2 -7
  48. ocrd_network/models/__init__.py +0 -2
  49. ocrd_network/models/job.py +31 -33
  50. ocrd_network/models/messages.py +3 -2
  51. ocrd_network/models/workspace.py +5 -5
  52. ocrd_network/process_helpers.py +54 -17
  53. ocrd_network/processing_server.py +63 -114
  54. ocrd_network/processing_worker.py +6 -5
  55. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  56. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  57. ocrd_network/runtime_data/__init__.py +1 -2
  58. ocrd_network/runtime_data/deployer.py +12 -85
  59. ocrd_network/runtime_data/hosts.py +61 -130
  60. ocrd_network/runtime_data/network_agents.py +7 -31
  61. ocrd_network/runtime_data/network_services.py +1 -1
  62. ocrd_network/server_cache.py +1 -1
  63. ocrd_network/server_utils.py +13 -52
  64. ocrd_network/utils.py +1 -0
  65. ocrd_utils/__init__.py +4 -4
  66. ocrd_utils/config.py +86 -76
  67. ocrd_utils/deprecate.py +3 -0
  68. ocrd_utils/image.py +51 -23
  69. ocrd_utils/introspect.py +8 -3
  70. ocrd_utils/logging.py +15 -7
  71. ocrd_utils/os.py +17 -4
  72. ocrd_utils/str.py +32 -16
  73. ocrd_validators/json_validator.py +4 -1
  74. ocrd_validators/ocrd_tool_validator.py +2 -1
  75. ocrd_validators/ocrd_zip_validator.py +5 -4
  76. ocrd_validators/page_validator.py +21 -9
  77. ocrd_validators/parameter_validator.py +3 -2
  78. ocrd_validators/processing_server_config.schema.yml +1 -33
  79. ocrd_validators/resource_list_validator.py +3 -1
  80. ocrd_validators/workspace_validator.py +30 -20
  81. ocrd_validators/xsd_mets_validator.py +2 -1
  82. ocrd_validators/xsd_page_validator.py +2 -1
  83. ocrd_validators/xsd_validator.py +4 -2
  84. ocrd/cli/log.py +0 -51
  85. ocrd/lib.bash +0 -317
  86. ocrd-3.5.1.dist-info/RECORD +0 -128
  87. ocrd_network/cli/processor_server.py +0 -31
  88. ocrd_network/models/ocrd_tool.py +0 -12
  89. ocrd_network/processor_server.py +0 -255
  90. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
  91. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
  92. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
  93. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
@@ -1,255 +0,0 @@
1
- from datetime import datetime
2
- from os import getpid
3
- from subprocess import run as subprocess_run, PIPE
4
- from uvicorn import run
5
-
6
- from fastapi import APIRouter, BackgroundTasks, FastAPI, status
7
- from fastapi.responses import FileResponse
8
-
9
- from ocrd_utils import (
10
- initLogging,
11
- get_ocrd_tool_json,
12
- getLogger,
13
- parse_json_string_with_comments
14
- )
15
- from .constants import JobState, ServerApiTags
16
- from .database import (
17
- DBProcessorJob,
18
- db_get_workspace,
19
- db_update_processing_job,
20
- db_get_processing_job,
21
- initiate_database
22
- )
23
- from .logging_utils import (
24
- configure_file_handler_with_formatter,
25
- get_processor_server_logging_file_path,
26
- get_processing_job_logging_file_path
27
- )
28
- from .models import PYJobInput, PYJobOutput, PYOcrdTool
29
- from .process_helpers import invoke_processor
30
- from .rabbitmq_utils import OcrdResultMessage
31
- from .server_utils import (
32
- _get_processor_job,
33
- _get_processor_job_log,
34
- raise_http_exception,
35
- validate_and_return_mets_path,
36
- validate_job_input
37
- )
38
- from .utils import calculate_execution_time, post_to_callback_url, generate_id
39
-
40
-
41
- class ProcessorServer(FastAPI):
42
- def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
43
- if not (processor_name or processor_class):
44
- raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
45
- super().__init__(
46
- on_startup=[self.on_startup],
47
- on_shutdown=[self.on_shutdown],
48
- title=f"Network agent - Processor Server",
49
- description="Network agent - Processor Server"
50
- )
51
- initLogging()
52
- self.log = getLogger("ocrd_network.processor_server")
53
- log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
54
- configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
55
-
56
- self.db_url = mongodb_addr
57
- self.processor_name = processor_name
58
- self.processor_class = processor_class
59
- self.ocrd_tool = None
60
- self.version = None
61
-
62
- self.version = self.get_version()
63
- self.ocrd_tool = self.get_ocrd_tool()
64
-
65
- if not self.ocrd_tool:
66
- raise Exception(f"The ocrd_tool is empty or missing")
67
-
68
- if not self.processor_name:
69
- self.processor_name = self.ocrd_tool["executable"]
70
-
71
- self.add_api_routes_processing()
72
- self.log.info(f"Initialized processor server: {processor_name}")
73
-
74
- async def on_startup(self):
75
- await initiate_database(db_url=self.db_url)
76
-
77
- async def on_shutdown(self) -> None:
78
- """
79
- TODO: Perform graceful shutdown operations here
80
- """
81
- pass
82
-
83
- def add_api_routes_processing(self):
84
- processing_router = APIRouter()
85
- processing_router.add_api_route(
86
- path="/info",
87
- endpoint=self.get_processor_info,
88
- methods=["GET"],
89
- tags=[ServerApiTags.PROCESSING],
90
- status_code=status.HTTP_200_OK,
91
- summary="Get information about this processor.",
92
- response_model=PYOcrdTool,
93
- response_model_exclude_unset=True,
94
- response_model_exclude_none=True
95
- )
96
- processing_router.add_api_route(
97
- path="/run",
98
- endpoint=self.create_processor_task,
99
- methods=["POST"],
100
- tags=[ServerApiTags.PROCESSING],
101
- status_code=status.HTTP_202_ACCEPTED,
102
- summary="Submit a job to this processor.",
103
- response_model=PYJobOutput,
104
- response_model_exclude_unset=True,
105
- response_model_exclude_none=True
106
- )
107
- processing_router.add_api_route(
108
- path="/job/{job_id}",
109
- endpoint=self.get_processor_job,
110
- methods=["GET"],
111
- tags=[ServerApiTags.PROCESSING],
112
- status_code=status.HTTP_200_OK,
113
- summary="Get information about a job based on its ID",
114
- response_model=PYJobOutput,
115
- response_model_exclude_unset=True,
116
- response_model_exclude_none=True
117
- )
118
- processing_router.add_api_route(
119
- path="/log/{job_id}",
120
- endpoint=self.get_processor_job_log,
121
- methods=["GET"],
122
- tags=[ServerApiTags.PROCESSING],
123
- status_code=status.HTTP_200_OK,
124
- summary="Get the log file of a job id"
125
- )
126
-
127
- async def get_processor_info(self):
128
- if not self.ocrd_tool:
129
- message = "Empty or missing ocrd tool json."
130
- raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
131
- return self.ocrd_tool
132
-
133
- # Note: The Processing server pushes to a queue, while
134
- # the Processor Server creates (pushes to) a background task
135
- async def create_processor_task(self, job_input: PYJobInput, background_tasks: BackgroundTasks):
136
- validate_job_input(self.log, self.processor_name, self.ocrd_tool, job_input)
137
- job_input.path_to_mets = await validate_and_return_mets_path(self.log, job_input)
138
-
139
- # The request is not forwarded from the Processing Server, assign a job_id
140
- if not job_input.job_id:
141
- job_id = generate_id()
142
- # Create a DB entry
143
- job = DBProcessorJob(
144
- **job_input.dict(exclude_unset=True, exclude_none=True),
145
- job_id=job_id,
146
- processor_name=self.processor_name,
147
- state=JobState.queued
148
- )
149
- await job.insert()
150
- else:
151
- job = await db_get_processing_job(job_input.job_id)
152
- # await self.run_processor_task(job=job)
153
- background_tasks.add_task(self.run_processor_task, job)
154
- return job.to_job_output()
155
-
156
- async def run_processor_task(self, job: DBProcessorJob):
157
- execution_failed = False
158
- start_time = datetime.now()
159
- job_log_file = get_processing_job_logging_file_path(job_id=job.job_id)
160
- await db_update_processing_job(
161
- job_id=job.job_id,
162
- state=JobState.running,
163
- start_time=start_time,
164
- log_file_path=job_log_file
165
- )
166
-
167
- mets_server_url = (await db_get_workspace(workspace_mets_path=job.path_to_mets)).mets_server_url
168
- try:
169
- invoke_processor(
170
- processor_class=self.processor_class,
171
- executable=self.processor_name,
172
- abs_path_to_mets=job.path_to_mets,
173
- input_file_grps=job.input_file_grps,
174
- output_file_grps=job.output_file_grps,
175
- page_id=job.page_id,
176
- parameters=job.parameters,
177
- mets_server_url=mets_server_url,
178
- log_filename=job_log_file,
179
- )
180
- except Exception as error:
181
- self.log.debug(f"processor_name: {self.processor_name}, path_to_mets: {job.path_to_mets}, "
182
- f"input_grps: {job.input_file_grps}, output_file_grps: {job.output_file_grps}, "
183
- f"page_id: {job.page_id}, parameters: {job.parameters}")
184
- self.log.exception(error)
185
- execution_failed = True
186
- end_time = datetime.now()
187
- exec_duration = calculate_execution_time(start_time, end_time)
188
- job_state = JobState.success if not execution_failed else JobState.failed
189
- await db_update_processing_job(
190
- job_id=job.job_id,
191
- state=job_state,
192
- end_time=end_time,
193
- exec_time=f"{exec_duration} ms"
194
- )
195
- result_message = OcrdResultMessage(
196
- job_id=job.job_id,
197
- state=job_state.value,
198
- path_to_mets=job.path_to_mets,
199
- # May not be always available
200
- workspace_id=job.workspace_id if job.workspace_id else ''
201
- )
202
- self.log.info(f"Result message: {result_message}")
203
- if job.callback_url:
204
- # If the callback_url field is set,
205
- # post the result message (callback to a user defined endpoint)
206
- post_to_callback_url(self.log, job.callback_url, result_message)
207
- if job.internal_callback_url:
208
- # If the internal callback_url field is set,
209
- # post the result message (callback to Processing Server endpoint)
210
- post_to_callback_url(self.log, job.internal_callback_url, result_message)
211
-
212
- def get_ocrd_tool(self):
213
- if self.ocrd_tool:
214
- return self.ocrd_tool
215
- if self.processor_class:
216
- # The way of accessing ocrd tool like in the line below may be problematic
217
- # ocrd_tool = self.processor_class(workspace=None, version=True).ocrd_tool
218
- ocrd_tool = parse_json_string_with_comments(
219
- subprocess_run(
220
- [self.processor_name, "--dump-json"],
221
- stdout=PIPE,
222
- check=True,
223
- universal_newlines=True
224
- ).stdout
225
- )
226
- else:
227
- ocrd_tool = get_ocrd_tool_json(self.processor_name)
228
- return ocrd_tool
229
-
230
- def get_version(self) -> str:
231
- if self.version:
232
- return self.version
233
-
234
- """
235
- if self.processor_class:
236
- # The way of accessing the version like in the line below may be problematic
237
- # version_str = self.processor_class(workspace=None, version=True).version
238
- return version_str
239
- """
240
- version_str = subprocess_run(
241
- [self.processor_name, "--version"],
242
- stdout=PIPE,
243
- check=True,
244
- universal_newlines=True
245
- ).stdout
246
- return version_str
247
-
248
- def run_server(self, host, port):
249
- run(self, host=host, port=port)
250
-
251
- async def get_processor_job(self, job_id: str) -> PYJobOutput:
252
- return await _get_processor_job(self.log, job_id)
253
-
254
- async def get_processor_job_log(self, job_id: str) -> FileResponse:
255
- return await _get_processor_job_log(self.log, job_id)
File without changes
File without changes