ocrd 3.6.0__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -32,12 +32,11 @@ from ..decorators import ocrd_loglevel
32
32
  from .ocrd_tool import ocrd_tool_cli
33
33
  from .workspace import workspace_cli
34
34
  from .process import process_cli
35
- from .bashlib import bashlib_cli
36
35
  from .validate import validate_cli
37
36
  from .resmgr import resmgr_cli
38
37
  from .zip import zip_cli
39
- from .log import log_cli
40
38
  from .network import network_cli
39
+ from .bashlib import bashlib_cli
41
40
 
42
41
 
43
42
  __all__ = ['cli']
@@ -117,9 +116,8 @@ def cli(**kwargs): # pylint: disable=unused-argument
117
116
  cli.add_command(ocrd_tool_cli)
118
117
  cli.add_command(workspace_cli)
119
118
  cli.add_command(process_cli)
120
- cli.add_command(bashlib_cli)
121
119
  cli.add_command(zip_cli)
122
120
  cli.add_command(validate_cli)
123
- cli.add_command(log_cli)
124
121
  cli.add_command(resmgr_cli)
125
122
  cli.add_command(network_cli)
123
+ cli.add_command(bashlib_cli)
ocrd/cli/bashlib.py CHANGED
@@ -6,24 +6,14 @@ OCR-D CLI: bash library
6
6
  :nested: full
7
7
 
8
8
  """
9
- from __future__ import print_function
10
- import sys
11
- import click
12
9
 
10
+ # WARNING: bashlib processors have been deprecated as of v3 of the OCR-D/core API
11
+ # and will be removed in v3.7.0. We retain the `ocrd bashlib` CLI only
12
+ # to not break the `ocrd bashlib filename` command, which is used in CD
13
+ # scripts to get the `share` directory of the core installation.
14
+
15
+ import click
13
16
  from ocrd.constants import BASHLIB_FILENAME
14
- import ocrd.constants
15
- import ocrd_utils.constants
16
- from ocrd_utils.constants import DEFAULT_METS_BASENAME
17
- import ocrd_models.constants
18
- import ocrd_validators.constants
19
- from ocrd.decorators import (
20
- parameter_option,
21
- parameter_override_option,
22
- ocrd_loglevel,
23
- ocrd_cli_wrap_processor
24
- )
25
- from ocrd_utils import make_file_id
26
- from ocrd.processor import Processor
27
17
 
28
18
  # ----------------------------------------------------------------------
29
19
  # ocrd bashlib
@@ -50,104 +40,3 @@ def bashlib_filename():
50
40
  """
51
41
  print(BASHLIB_FILENAME)
52
42
 
53
-
54
- @bashlib_cli.command('constants')
55
- @click.argument('name')
56
- def bashlib_constants(name):
57
- """
58
- Query constants from ocrd_utils and ocrd_models
59
- """
60
- all_constants = {}
61
- for src in [ocrd.constants, ocrd_utils.constants, ocrd_models.constants, ocrd_validators.constants]:
62
- for k in src.__all__:
63
- all_constants[k] = src.__dict__[k]
64
- if name in ['*', 'KEYS', '__all__']:
65
- print(sorted(all_constants.keys()))
66
- sys.exit(0)
67
- if name not in all_constants:
68
- print("ERROR: name '%s' is not a known constant" % name, file=sys.stderr)
69
- sys.exit(1)
70
- val = all_constants[name]
71
- if isinstance(val, dict):
72
- # make this bash-friendly (show initialization for associative array)
73
- for key in val:
74
- print("[%s]=%s" % (key, val[key]), end=' ')
75
- else:
76
- print(val)
77
-
78
-
79
- @bashlib_cli.command('input-files')
80
- @click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
81
- @click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
82
- @click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
83
- @click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server', default=None)
84
- @click.option('-d', '--working-dir', help="Working Directory")
85
- @click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
86
- @click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
87
- @click.option('-g', '--page-id', help="ID(s) of the pages to process")
88
- @click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
89
- "(with '--page-id', remove only those).\n"
90
- "Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE")
91
- @click.option('--debug', is_flag=True, default=False, help="Abort on any errors with full stack trace.\n"
92
- "Short-hand for OCRD_MISSING_OUTPUT=ABORT")
93
- @parameter_option
94
- @parameter_override_option
95
- @ocrd_loglevel
96
- def bashlib_input_files(ocrd_tool, executable, **kwargs):
97
- """
98
- List input files for processing
99
-
100
- Instantiate a processor and workspace from the given processing options.
101
- Then loop through the input files of the input fileGrp, and for each one,
102
- print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended
103
- `outputFileId` (from ``make_file_id``).
104
-
105
- (The printing format is one associative array initializer per line.)
106
- """
107
- class BashlibProcessor(Processor):
108
- # go half way of the normal run_processor / process_workspace call tree
109
- # by just delegating to process_workspace, overriding process_page_file
110
- # to ensure all input files exist locally (without persisting them in the METS)
111
- # and print what needs to be acted on in bash-friendly way
112
- def process_page_file(self, *input_files):
113
- for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
114
- # make this bash-friendly (show initialization for associative array)
115
- if len(input_files) > 1:
116
- # single quotes allow us to preserve the list value inside the alist
117
- value = ' '.join(str(getattr(res, field)) for res in input_files)
118
- else:
119
- value = str(getattr(input_files[0], field))
120
- print(f"[{field}]='{value}'", end=' ')
121
- output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
122
- print(f"[outputFileId]='{output_file_id}'")
123
- if ocrd_tool and executable:
124
- class FullBashlibProcessor(BashlibProcessor):
125
- @property
126
- def metadata_location(self):
127
- # needed for metadata loading and validation mechanism
128
- return ocrd_tool
129
-
130
- @property
131
- def executable(self):
132
- # needed for ocrd_tool lookup
133
- return executable
134
- processor_class = FullBashlibProcessor
135
- else:
136
- # we have no true metadata file, so fill in just to make it work
137
- class UnknownBashlibProcessor(BashlibProcessor):
138
- @property
139
- def ocrd_tool(self):
140
- # needed to satisfy the validator
141
- return {'executable': '',
142
- # required now
143
- 'input_file_grp_cardinality': 1,
144
- 'output_file_grp_cardinality': 1,
145
- 'steps': ['']}
146
-
147
- @property
148
- def version(self):
149
- # needed to satisfy the validator and wrapper
150
- return '1.0'
151
- processor_class = UnknownBashlibProcessor
152
-
153
- ocrd_cli_wrap_processor(processor_class, **kwargs)
ocrd/mets_server.py CHANGED
@@ -258,12 +258,12 @@ class ClientSideOcrdMets:
258
258
 
259
259
  def add_agent(self, **kwargs):
260
260
  if not self.multiplexing_mode:
261
- return self.session.request("POST", f"{self.url}/agent", json=OcrdAgentModel.create(**kwargs).dict())
261
+ return self.session.request("POST", f"{self.url}/agent", json=OcrdAgentModel.create(**kwargs).model_dump())
262
262
  else:
263
263
  self.session.request(
264
264
  "POST",
265
265
  self.url,
266
- json=MpxReq.add_agent(self.ws_dir_path, OcrdAgentModel.create(**kwargs).dict())
266
+ json=MpxReq.add_agent(self.ws_dir_path, OcrdAgentModel.create(**kwargs).model_dump())
267
267
  ).json()
268
268
  return OcrdAgentModel.create(**kwargs)
269
269
 
@@ -305,7 +305,7 @@ class ClientSideOcrdMets:
305
305
  mimetype=mimetype, url=url, local_filename=local_filename
306
306
  )
307
307
  # add force+ignore
308
- kwargs = {**kwargs, **data.dict()}
308
+ kwargs = {**kwargs, **data.model_dump()}
309
309
 
310
310
  if not self.multiplexing_mode:
311
311
  r = self.session.request("POST", f"{self.url}/file", data=kwargs)
@@ -530,7 +530,7 @@ class OcrdMetsServer:
530
530
 
531
531
  @app.post(path='/agent', response_model=OcrdAgentModel)
532
532
  async def add_agent(agent: OcrdAgentModel):
533
- kwargs = agent.dict()
533
+ kwargs = agent.model_dump()
534
534
  kwargs['_type'] = kwargs.pop('type')
535
535
  workspace.mets.add_agent(**kwargs)
536
536
  response = agent
@@ -575,7 +575,7 @@ class OcrdMetsServer:
575
575
  local_filename=local_filename
576
576
  )
577
577
  # Add to workspace
578
- kwargs = file_resource.dict()
578
+ kwargs = file_resource.model_dump()
579
579
  workspace.add_file(**kwargs, force=force)
580
580
  response = file_resource
581
581
  self.log.debug(f"POST /file -> {response.__dict__}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.6.0
3
+ Version: 3.7.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -35,7 +35,7 @@ Requires-Dist: opencv-python-headless
35
35
  Requires-Dist: paramiko
36
36
  Requires-Dist: pika>=1.2.0
37
37
  Requires-Dist: Pillow>=7.2.0
38
- Requires-Dist: pydantic==1.*
38
+ Requires-Dist: pydantic>=2.0.0
39
39
  Requires-Dist: python-magic
40
40
  Requires-Dist: python-multipart
41
41
  Requires-Dist: pyyaml
@@ -220,12 +220,6 @@ Also contains the command line tool `ocrd`.
220
220
 
221
221
  See [README for `ocrd`](./README_ocrd.md) for further information.
222
222
 
223
- ## bash library
224
-
225
- Builds a bash script that can be sourced by other bash scripts to create OCRD-compliant CLI.
226
-
227
- See [README for `bashlib`](./README_bashlib.md) for further information.
228
-
229
223
  ## Testing
230
224
 
231
225
  Download assets (`make assets`)
@@ -1,7 +1,6 @@
1
1
  ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
2
  ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
- ocrd/lib.bash,sha256=Eu-_Eb3uUNEoa3GmSDyUm_4FmmSJctglPOWz8EcBJ2c,10395
4
- ocrd/mets_server.py,sha256=eXIbSip6gYi5RyJZlriIrR9lUGpJjL0kXq3UDZPeqVs,22274
3
+ ocrd/mets_server.py,sha256=LbZ0U2_o0W7cWO639U7E816dXabro8-8yHGX0quvHn4,22304
5
4
  ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
5
  ocrd/resolver.py,sha256=7uwHRxaK8YMdKHe_a2dfrcNwL6UhQRJRVBrIX7GST7Q,15443
7
6
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
@@ -10,9 +9,8 @@ ocrd/task_sequence.py,sha256=r4e4iaP9AXzTL2xQZpfYnHuFXty5pE-ym3gIyUz1aJc,7180
10
9
  ocrd/workspace.py,sha256=UL_gX0KA-MmpayBl9KGYTfcl-1Canj8S991G9RHhu70,65216
11
10
  ocrd/workspace_backup.py,sha256=aUOnYeJ-nWu-Zve27B0cYd9ZtBkmQX4F4Wim2UcrR5I,3624
12
11
  ocrd/workspace_bagger.py,sha256=4viSQoWteW0V4B_blB6asJXd4-qniGGJyCPfKnrsyrY,12054
13
- ocrd/cli/__init__.py,sha256=klgd85WNdLzqivbkEbt5cMOFvEoMoEE2KnT_oKAwTBs,2888
14
- ocrd/cli/bashlib.py,sha256=RLp6ejgbxLjGfNmeniVTuzLtrGeJrGJAZYc9JPT7U68,5998
15
- ocrd/cli/log.py,sha256=jRgxdoJS14OPXI9LpovPqB3JcxlcGZH7QOde6-TmykQ,1566
12
+ ocrd/cli/__init__.py,sha256=-n2jpGBZs_OMpI31E7CljGVdoFxDhgCAYwibcl_vp1Q,2838
13
+ ocrd/cli/bashlib.py,sha256=sEpTKbqM5DEo6838Ki5aFU8QsokA2SqQ841gcBu7M5M,1148
16
14
  ocrd/cli/network.py,sha256=iQ0AhQRGvIFyJY9RBArUiA_wuz7IfNKvU4L8KpVggnY,530
17
15
  ocrd/cli/ocrd_tool.py,sha256=kB3Y3tj7Fpz6Ts4KgVlznhXpAx8gCDvJTnO39j8SGL4,7679
18
16
  ocrd/cli/process.py,sha256=yfhBSYmuY5k2AccKwiNvG9hCDx1coYyWjq9BBwYaL3Y,1234
@@ -67,10 +65,10 @@ ocrd_network/cli/client.py,sha256=H5fiJhBqbFn4_B2p3V20GejGTIYO-mNglh3y5nzUGhs,10
67
65
  ocrd_network/cli/processing_server.py,sha256=NsuI0f9h4KDwe39YugmHo5cJ_29chcLLQ7DThKfPO7s,770
68
66
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
69
67
  ocrd_network/models/__init__.py,sha256=eVYMZaktzlyHKx-zI7GLYyRlZd3Vi_lNgsqSSFwqb6U,475
70
- ocrd_network/models/job.py,sha256=6NxcNIUHMS9Ft5UGWegngB6uNUOnE_6nOnRGKFbjI6A,4243
71
- ocrd_network/models/messages.py,sha256=XnyLMX77NchgmtKJRtqtBFsk_sCR4OGEuWm_d3uDkj8,657
68
+ ocrd_network/models/job.py,sha256=9bwp8DFoRH96WnRpkDV3XRfXCBiupzK6WXjqPsTcvLg,4440
69
+ ocrd_network/models/messages.py,sha256=OUDTjUiaATStsSAHCEDilUhBSruPsjpBtIBsllqN2Z0,672
72
70
  ocrd_network/models/workflow.py,sha256=GL8q7RX9fGdXG3iVyJpCeLXbWa-2qI_SIxqhzxs9VK8,189
73
- ocrd_network/models/workspace.py,sha256=7kjCTY0ixqcyJP9eHnptkrJnPTCD3zFNfhApZz9w6OU,1568
71
+ ocrd_network/models/workspace.py,sha256=rZcBWNlQOZX2KukP79IDRrXJvZ-H5pPH3WpPuw72HBM,1596
74
72
  ocrd_network/rabbitmq_utils/__init__.py,sha256=XLIqZhfin4I4m80G9B__UcP45Lz10_mEpMYLXGOByUk,741
75
73
  ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
76
74
  ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
@@ -91,9 +89,9 @@ ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
91
89
  ocrd_utils/deprecate.py,sha256=luAqGWUSF-9DHmTd2lDiQoQPA5SrJazdoDPQYQ6A7Z4,1029
92
90
  ocrd_utils/image.py,sha256=tG5WnNtrrvGjm2-r6NVs1Jm7z8fee3MuLKotAD6C2RU,24818
93
91
  ocrd_utils/introspect.py,sha256=LPhgcUuoicQcURDCWlCpSdbfVyxID5vmQPXJ9vzuYV0,1977
94
- ocrd_utils/logging.py,sha256=Kj_z92pXbzWmc0jMJ299Pup9hfcnzJH8ltvI9w7STZc,7824
92
+ ocrd_utils/logging.py,sha256=-cCi_9kIzmLUixfnDcx2jq9IQuwMqrU-71RJhKOQilQ,7929
95
93
  ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
96
- ocrd_utils/os.py,sha256=GstXB4i3kDBy7PXU-TaPYV4BI-lkqd_cYKl6uDkVMkw,9829
94
+ ocrd_utils/os.py,sha256=QEOramsUmBDzZxslPMZhfTviPr7EnopXKEgNc5zwwTs,9817
97
95
  ocrd_utils/str.py,sha256=4P0MdX0LCTqDTnsi_y5wNOBXW_TuTFANF7NYRXjo4x0,10136
98
96
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
99
97
  ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
@@ -117,9 +115,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
117
115
  ocrd_validators/xsd_mets_validator.py,sha256=YgiuNtwNDtn3LuvdFFscnmsGREF_wQ4wtA76yE2Iljw,469
118
116
  ocrd_validators/xsd_page_validator.py,sha256=ggt-nmaz-DDyAPwm3ZMVvtChuV2BJ2ZEEbWpePL9vTk,469
119
117
  ocrd_validators/xsd_validator.py,sha256=ahJo_oVvTK_JB0Cu4CkMC8l_gbzsyW91AxGtelMjqrg,2115
120
- ocrd-3.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
121
- ocrd-3.6.0.dist-info/METADATA,sha256=Qc1Nap_yo-Y3_8FYaVIvGXV79q4NKrCZs6h4BEKXIg4,10523
122
- ocrd-3.6.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
123
- ocrd-3.6.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
124
- ocrd-3.6.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
125
- ocrd-3.6.0.dist-info/RECORD,,
118
+ ocrd-3.7.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
119
+ ocrd-3.7.0.dist-info/METADATA,sha256=yCPxM32p95ODwi46-eVJxTjxCheoiuxOZujpKYmEzIA,10340
120
+ ocrd-3.7.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
121
+ ocrd-3.7.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
122
+ ocrd-3.7.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
123
+ ocrd-3.7.0.dist-info/RECORD,,
@@ -13,7 +13,7 @@ class PYJobInput(BaseModel):
13
13
  workspace_id: Optional[str] = None
14
14
  description: Optional[str] = None
15
15
  input_file_grps: List[str]
16
- output_file_grps: Optional[List[str]]
16
+ output_file_grps: Optional[List[str]] = None
17
17
  page_id: Optional[str] = None
18
18
  parameters: dict = {} # Always set to empty dict when None, otherwise it fails ocr-d-validation
19
19
  result_queue_name: Optional[str] = None
@@ -23,8 +23,8 @@ class PYJobInput(BaseModel):
23
23
  # If set, specifies a list of job ids this job depends on
24
24
  depends_on: Optional[List[str]] = None
25
25
 
26
- class Config:
27
- schema_extra = {
26
+ model_config = {
27
+ 'json_schema_extra': {
28
28
  'example': {
29
29
  'path_to_mets': '/path/to/mets.xml',
30
30
  'description': 'The description of this execution',
@@ -34,6 +34,7 @@ class PYJobInput(BaseModel):
34
34
  'parameters': {}
35
35
  }
36
36
  }
37
+ }
37
38
 
38
39
 
39
40
  class PYJobOutput(BaseModel):
@@ -42,12 +43,12 @@ class PYJobOutput(BaseModel):
42
43
  job_id: str
43
44
  processor_name: str
44
45
  state: JobState = JobState.unset
45
- path_to_mets: Optional[str]
46
- workspace_id: Optional[str]
46
+ path_to_mets: Optional[str] = None
47
+ workspace_id: Optional[str] = None
47
48
  input_file_grps: List[str]
48
- output_file_grps: Optional[List[str]]
49
+ output_file_grps: Optional[List[str]] = None
49
50
  page_id: Optional[str] = None
50
- log_file_path: Optional[str]
51
+ log_file_path: Optional[str] = None
51
52
 
52
53
 
53
54
  class DBProcessorJob(Document):
@@ -55,22 +56,22 @@ class DBProcessorJob(Document):
55
56
  """
56
57
  job_id: str
57
58
  processor_name: str
58
- path_to_mets: Optional[str]
59
- workspace_id: Optional[str]
60
- description: Optional[str]
59
+ path_to_mets: Optional[str] = None
60
+ workspace_id: Optional[str] = None
61
+ description: Optional[str] = None
61
62
  state: JobState = JobState.unset
62
63
  input_file_grps: List[str]
63
- output_file_grps: Optional[List[str]]
64
- page_id: Optional[str]
65
- parameters: Optional[dict]
66
- depends_on: Optional[List[str]]
67
- result_queue_name: Optional[str]
68
- callback_url: Optional[str]
69
- internal_callback_url: Optional[str]
70
- start_time: Optional[datetime]
71
- end_time: Optional[datetime]
72
- exec_time: Optional[str]
73
- log_file_path: Optional[str]
64
+ output_file_grps: Optional[List[str]] = None
65
+ page_id: Optional[str] = None
66
+ parameters: Optional[dict] = None
67
+ depends_on: Optional[List[str]] = None
68
+ result_queue_name: Optional[str] = None
69
+ callback_url: Optional[str] = None
70
+ internal_callback_url: Optional[str] = None
71
+ start_time: Optional[datetime] = None
72
+ end_time: Optional[datetime] = None
73
+ exec_time: Optional[str] = None
74
+ log_file_path: Optional[str] = None
74
75
 
75
76
  class Settings:
76
77
  use_enum_values = True
@@ -99,9 +100,9 @@ class PYWorkflowJobOutput(BaseModel):
99
100
  page_id: str
100
101
  page_wise: bool = False
101
102
  job_id: str
102
- path_to_mets: Optional[str]
103
- workspace_id: Optional[str]
104
- description: Optional[str]
103
+ path_to_mets: Optional[str] = None
104
+ workspace_id: Optional[str] = None
105
+ description: Optional[str] = None
105
106
 
106
107
 
107
108
  class DBWorkflowJob(Document):
@@ -114,10 +115,10 @@ class DBWorkflowJob(Document):
114
115
  # key: page_id
115
116
  # value: List of and processing job ids sorted in dependency order
116
117
  processing_job_ids: Dict
117
- path_to_mets: Optional[str]
118
- workspace_id: Optional[str]
119
- description: Optional[str]
120
- workflow_callback_url: Optional[str]
118
+ path_to_mets: Optional[str] = None
119
+ workspace_id: Optional[str] = None
120
+ description: Optional[str] = None
121
+ workflow_callback_url: Optional[str] = None
121
122
 
122
123
  class Settings:
123
124
  use_enum_values = True
@@ -11,8 +11,8 @@ class PYResultMessage(BaseModel):
11
11
  path_to_mets: Optional[str] = None
12
12
  workspace_id: Optional[str] = None
13
13
 
14
- class Config:
15
- schema_extra = {
14
+ model_config = {
15
+ "json_schema_extra": {
16
16
  "example": {
17
17
  "job_id": "d8e36726-ed28-5476-b83c-bc31d2eecf1f",
18
18
  "state": JobState.success,
@@ -20,3 +20,4 @@ class PYResultMessage(BaseModel):
20
20
  "workspace_id": "c7f25615-fc17-4365-a74d-ad20e1ddbd0e"
21
21
  }
22
22
  }
23
+ }
@@ -25,10 +25,10 @@ class DBWorkspace(Document):
25
25
  workspace_mets_path: str
26
26
  ocrd_identifier: str
27
27
  bagit_profile_identifier: str
28
- ocrd_base_version_checksum: Optional[str]
29
- ocrd_mets: Optional[str]
30
- bag_info_adds: Optional[dict]
31
- mets_server_url: Optional[str]
28
+ ocrd_base_version_checksum: Optional[str] = None
29
+ ocrd_mets: Optional[str] = None
30
+ bag_info_adds: Optional[dict] = None
31
+ mets_server_url: Optional[str] = None
32
32
  deleted: bool = False
33
33
 
34
34
  class Settings:
ocrd_utils/logging.py CHANGED
@@ -75,6 +75,9 @@ _ocrdLevel2pythonLevel = {
75
75
 
76
76
 
77
77
  def tf_disable_interactive_logs():
78
+ """
79
+ Disable the interactive logging of tf/keras and set the log level to error or higher
80
+ """
78
81
  try:
79
82
  from os import environ # pylint: disable=import-outside-toplevel
80
83
  # This env variable must be set before importing from Keras
ocrd_utils/os.py CHANGED
@@ -171,7 +171,7 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None):
171
171
  # files over directories; but we want data files only
172
172
  # todo: more code and cache exclusion patterns!
173
173
  ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
174
- 'environment.pickle', 'resource_list.yml', 'lib.bash']):
174
+ 'environment.pickle', 'resource_list.yml']):
175
175
  continue
176
176
  candidates.append(resource)
177
177
  # recurse once
ocrd/cli/log.py DELETED
@@ -1,56 +0,0 @@
1
- """
2
- OCR-D CLI: Logging
3
-
4
- .. click:: ocrd.cli.log:log_cli
5
- :prog: ocrd log
6
- :nested: full
7
- """
8
- import click
9
- from ocrd_utils import initLogging, getLogger, getLevelName
10
-
11
-
12
- class LogCtx():
13
-
14
- def __init__(self, name):
15
- self.name = name
16
-
17
- def log(self, lvl, *args, **kwargs):
18
- logger = getLogger(self.name)
19
- logger.log(getLevelName(lvl), *args, **kwargs)
20
-
21
-
22
- pass_log = click.make_pass_decorator(LogCtx)
23
-
24
-
25
- @click.group("log")
26
- @click.option('-n', '--name', envvar='OCRD_TOOL_NAME', default='log_cli', metavar='LOGGER_NAME',
27
- help='Name of the logger', show_default=True)
28
- @click.pass_context
29
- def log_cli(ctx, name, *args, **kwargs):
30
- """
31
- Logging
32
-
33
- Logger name will be ocrd.OCRD_TOOL_NAME where OCRD_TOOL_NAME is normally
34
- (when using bashlib) the name of the processor.
35
- """
36
- initLogging()
37
- ctx.obj = LogCtx('ocrd.' + name)
38
-
39
-
40
- def _bind_log_command(lvl):
41
- @click.argument('msgs', nargs=-1)
42
- @pass_log
43
- def _log_wrapper(ctx, msgs):
44
- if not msgs:
45
- ctx.log(lvl.upper(), '')
46
- elif len(msgs) == 1 and msgs[0] == '-':
47
- for stdin_line in click.get_text_stream('stdin'):
48
- ctx.log(lvl.upper(), stdin_line.rstrip('\n'))
49
- else:
50
- msg = list(msgs) if '%s' in msgs[0] else ' '.join([x.replace('%', '%%') for x in msgs])
51
- ctx.log(lvl.upper(), msg)
52
- return _log_wrapper
53
-
54
-
55
- for _lvl in ['trace', 'debug', 'info', 'warning', 'error', 'critical']:
56
- log_cli.command(_lvl, help="Log a %s message" % _lvl.upper())(_bind_log_command(_lvl))
ocrd/lib.bash DELETED
@@ -1,310 +0,0 @@
1
- ((BASH_VERSINFO<4 || BASH_VERSINFO==4 && BASH_VERSINFO[1]<4)) && \
2
- echo >&2 "bash $BASH_VERSION is too old. Please install bash 4.4 or newer." && \
3
- exit 1
4
-
5
- ## ### `ocrd__raise`
6
- ##
7
- ## Raise an error and exit.
8
- ocrd__raise () {
9
- echo >&2 "ERROR: $1"; exit 127
10
- }
11
-
12
- ## ### `ocrd__log`
13
- ##
14
- ## Delegate logging to `ocrd log`
15
- ocrd__log () {
16
- local log_level="${ocrd__argv[log_level]:-}"
17
- if [[ -n "$log_level" ]];then
18
- ocrd -l "$log_level" log "$@"
19
- else
20
- ocrd log "$@"
21
- fi
22
- }
23
-
24
-
25
- ## ### `ocrd__minversion`
26
- ##
27
- ## Ensure minimum version
28
- # ht https://stackoverflow.com/posts/4025065
29
- ocrd__minversion () {
30
- set -e
31
- local minversion_raw="$1"
32
- local version_raw=$(ocrd --version|sed 's/ocrd, version //')
33
- local version_mmp=$(echo "$version_raw" | grep -Eo '([0-9]+\.?){3}')
34
- local version_prerelease_suffix="${version_raw#$version_mmp}"
35
- if [[ -z $version_prerelease_suffix ]];then
36
- version_prerelease_suffix=0
37
- fi
38
- local minversion_mmp=$(echo "$minversion_raw" | grep -Eo '([0-9]+\.?){3}')
39
- local minversion_prerelease_suffix="${minversion_raw#$minversion_mmp}"
40
- if [[ -z $minversion_prerelease_suffix ]];then
41
- minversion_prerelease_suffix=0
42
- fi
43
- local IFS='.'
44
- version=($version_mmp)
45
- minversion=($minversion_mmp)
46
- # MAJOR > MAJOR
47
- if (( ${version[0]} > ${minversion[0]} ));then
48
- return
49
- # MAJOR == MAJOR
50
- elif (( ${version[0]} == ${minversion[0]} ));then
51
- # MINOR > MINOR
52
- if (( ${version[1]} > ${minversion[1]} ));then
53
- return
54
- # MINOR == MINOR
55
- elif (( ${version[1]} == ${minversion[1]} ));then
56
- # PATCH > PATCH
57
- if (( ${version[2]} > ${minversion[2]} ));then
58
- return
59
- elif (( ${version[2]} == ${minversion[2]}));then
60
- # Match prerelease suffix like a1, b1 alphabetically
61
- if [ "$version_prerelease_suffix" = "$minversion_prerelease_suffix" -o "$version_prerelease_suffix" \> "$minversion_prerelease_suffix" ]; then
62
- return
63
- fi
64
- fi
65
- fi
66
- fi
67
- ocrd__raise "ocrd/core is too old ($version_raw < $minversion_raw). Please update OCR-D/core"
68
- }
69
-
70
- ## ### `ocrd__dumpjson`
71
- ##
72
- ## Output ocrd-tool.json.
73
- ##
74
- ## Requires `$OCRD_TOOL_JSON` and `$OCRD_TOOL_NAME` to be set:
75
- ##
76
- ## ```sh
77
- ## export OCRD_TOOL_JSON=/path/to/ocrd-tool.json
78
- ## export OCRD_TOOL_NAME=ocrd-foo-bar
79
- ## ```
80
- ##
81
- ocrd__dumpjson () {
82
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" dump
83
- }
84
-
85
- ##
86
- ## Output file resource path.
87
- ##
88
- ocrd__resolve_resource () {
89
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" resolve-resource "$1"
90
- }
91
-
92
- ##
93
- ## Output file resource content.
94
- ##
95
- ocrd__show_resource () {
96
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" show-resource "$1"
97
- }
98
-
99
- ##
100
- ## Output file resources names.
101
- ##
102
- ocrd__list_resources () {
103
- ocrd ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" list-resources
104
- }
105
-
106
- ## ### `ocrd__usage`
107
- ##
108
- ## Print usage
109
- ##
110
- ocrd__usage () {
111
- declare -a _args=(ocrd-tool "$OCRD_TOOL_JSON" tool "$OCRD_TOOL_NAME" help)
112
- if [ -v ocrd__subcommand ];then
113
- _args+=($ocrd__subcommand)
114
- fi
115
- ocrd ${_args[@]}
116
- }
117
-
118
- ## ### `ocrd__parse_argv`
119
- ##
120
- ## Expects an associative array ("hash"/"dict") `ocrd__argv` to be defined:
121
- ##
122
- ## ```sh
123
- ## declare -A ocrd__argv=()
124
- ## ```
125
- ocrd__parse_argv () {
126
- set -e
127
-
128
- # if [[ -n "$ZSH_VERSION" ]];then
129
- # print -r -- ${+ocrd__argv} ${(t)ocrd__argv}
130
- # fi
131
- if ! declare -p "ocrd__argv" >/dev/null 2>/dev/null ;then
132
- ocrd__raise "Must set \$ocrd__argv (declare -A ocrd__argv)"
133
- fi
134
-
135
- if ! declare -p "params" >/dev/null 2>/dev/null ;then
136
- ocrd__raise "Must set \$params (declare -A params)"
137
- fi
138
-
139
- if ! declare -p "params_json" >/dev/null 2>/dev/null ;then
140
- ocrd__raise "Must set \$params_json (declare params_json)"
141
- fi
142
-
143
- if [[ $# = 0 ]];then
144
- ocrd__usage
145
- exit 1
146
- fi
147
-
148
- ocrd__argv[debug]=false
149
- ocrd__argv[overwrite]=false
150
- ocrd__argv[profile]=false
151
- ocrd__argv[profile_file]=
152
- ocrd__argv[mets_server_url]=
153
- ocrd__argv[mets_file]="$PWD/mets.xml"
154
-
155
- local __parameters=()
156
- local __parameter_overrides=()
157
-
158
- if [[ $1 == 'worker' || $1 == 'server' ]];then
159
- ocrd__subcommand="$1" ; shift ;
160
- fi
161
-
162
- while [[ "${1:-}" = -* ]];do
163
- case "$1" in
164
- -l|--log-level) ocrd__argv[log_level]=$2 ; shift ;;
165
- --log-filename) exec 2> "$2" ; shift ;;
166
- -h|--help|--usage) ocrd__usage; exit ;;
167
- -J|--dump-json) ocrd__dumpjson; exit ;;
168
- -D|--dump-module-dir) echo $(dirname "$OCRD_TOOL_JSON"); exit ;;
169
- -C|--show-resource) ocrd__show_resource "$2"; exit ;;
170
- -L|--list-resources) ocrd__list_resources; exit ;;
171
- -p|--parameter) __parameters+=(-p "$(ocrd__resolve_resource "$2" 2>/dev/null || echo "$2")") ; shift ;;
172
- -P|--parameter-override) __parameter_overrides+=(-P "$2" "$3") ; shift ; shift ;;
173
- -g|--page-id) ocrd__argv[page_id]=$2 ; shift ;;
174
- -O|--output-file-grp) ocrd__argv[output_file_grp]=$2 ; shift ;;
175
- -I|--input-file-grp) ocrd__argv[input_file_grp]=$2 ; shift ;;
176
- -w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
177
- -m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
178
- -U|--mets-server-url) ocrd__argv[mets_server_url]="$2" ; shift ;;
179
- --debug) ocrd__argv[debug]=true ;;
180
- --overwrite) ocrd__argv[overwrite]=true ;;
181
- --profile) ocrd__argv[profile]=true ;;
182
- --profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
183
- -V|--version) ocrd ocrd-tool "$OCRD_TOOL_JSON" version; exit ;;
184
- --queue) ocrd__worker_queue="$2" ; shift ;;
185
- --database) ocrd__worker_database="$2" ; shift ;;
186
- *) ocrd__raise "Unknown option '$1'" ;;
187
- esac
188
- shift
189
- done
190
-
191
- if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand ]; then
192
- if ! [ -v ocrd__subcommand ] ; then
193
- ocrd__raise "Provide subcommand 'worker' for Processing Worker"
194
- elif ! [ -v ocrd__worker_database ]; then
195
- ocrd__raise "For the Processing Worker --database is required"
196
- elif ! [ -v ocrd__worker_queue ]; then
197
- ocrd__raise "For the Processing Worker --queue is required"
198
- fi
199
- if [ ${ocrd__subcommand} = "worker" ]; then
200
- ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
201
- else
202
- ocrd__raise "subcommand must be 'worker' not '${ocrd__subcommand}'"
203
- fi
204
- exit
205
- fi
206
-
207
- if [[ ! -e "${ocrd__argv[mets_file]}" ]]; then
208
- ocrd__raise "METS file '${ocrd__argv[mets_file]}' not found"
209
- fi
210
-
211
- if [[ ! -d "${ocrd__argv[working_dir]:=$(dirname "${ocrd__argv[mets_file]}")}" ]]; then
212
- ocrd__raise "workdir '${ocrd__argv[working_dir]}' not a directory. Use -w/--working-dir to set correctly"
213
- fi
214
-
215
- if [[ ! "${ocrd__argv[log_level]:=INFO}" =~ OFF|ERROR|WARN|INFO|DEBUG|TRACE ]]; then
216
- ocrd__raise "log level '${ocrd__argv[log_level]}' is invalid"
217
- fi
218
-
219
- if [[ -z "${ocrd__argv[input_file_grp]:=}" ]]; then
220
- ocrd__raise "Provide --input-file-grp/-I explicitly!"
221
- fi
222
-
223
- if [[ -z "${ocrd__argv[output_file_grp]:=}" ]]; then
224
- ocrd__raise "Provide --output-file-grp/-O explicitly!"
225
- fi
226
-
227
- # enable profiling (to be extended/acted upon by caller)
228
- if [[ ${ocrd__argv[profile]} = true ]]; then
229
- if [[ -n "${ocrd__argv[profile_file]}" ]]; then
230
- exec 3> "${ocrd__argv[profile_file]}"
231
- else
232
- exec 3>&2
233
- fi
234
- BASH_XTRACEFD=3
235
- # just the builtin tracer (without timing):
236
- #set -x
237
- # our own (including timing):
238
- DEPTH=+++++++++++
239
- shopt -s extdebug
240
- showtime() { date "+${DEPTH:0:$BASH_SUBSHELL+1} %H:%M:%S $BASH_COMMAND" >&3; }
241
- declare +t showtime # no trace here
242
- trap showtime DEBUG
243
- fi
244
-
245
- # check parameters
246
- local params_parsed retval
247
- params_parsed="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params "${__parameters[@]}" "${__parameter_overrides[@]}")" || {
248
- retval=$?
249
- ocrd__raise "Failed to parse parameters (retval $retval):
250
- $params_parsed"
251
- }
252
- eval "$params_parsed"
253
- params_json="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params --json "${__parameters[@]}" "${__parameter_overrides[@]}")"
254
-
255
- }
256
-
257
- ocrd__wrap () {
258
- set -e
259
-
260
- declare -gx OCRD_TOOL_JSON="$1"
261
- declare -gx OCRD_TOOL_NAME="$2"
262
- shift
263
- shift
264
- declare -Agx params
265
- params=()
266
- declare -g params_json
267
- declare -Agx ocrd__argv
268
- ocrd__argv=()
269
-
270
- if ! which "ocrd" >/dev/null 2>/dev/null;then
271
- ocrd__raise "ocrd not in \$PATH"
272
- fi
273
-
274
- if ! declare -p "OCRD_TOOL_JSON" >/dev/null 2>/dev/null;then
275
- ocrd__raise "Must set \$OCRD_TOOL_JSON"
276
- elif [[ ! -r "$OCRD_TOOL_JSON" ]];then
277
- ocrd__raise "Cannot read \$OCRD_TOOL_JSON: '$OCRD_TOOL_JSON'"
278
- fi
279
-
280
- if [[ -z "$OCRD_TOOL_NAME" ]];then
281
- ocrd__raise "Must set \$OCRD_TOOL_NAME"
282
- elif ! ocrd ocrd-tool "$OCRD_TOOL_JSON" list-tools|grep -q "$OCRD_TOOL_NAME";then
283
- ocrd__raise "No such command \$OCRD_TOOL_NAME: $OCRD_TOOL_NAME"
284
- fi
285
-
286
- ocrd__parse_argv "$@"
287
-
288
- declare -ag ocrd__files
289
- IFS=$'\n'
290
- ocrd__files=( $(ocrd bashlib input-files \
291
- --ocrd-tool $OCRD_TOOL_JSON \
292
- --executable $OCRD_TOOL_NAME \
293
- $(if [[ ${ocrd__argv[debug]} = true ]]; then echo --debug; fi) \
294
- $(if [[ ${ocrd__argv[overwrite]} = true ]]; then echo --overwrite; fi) \
295
- -m "${ocrd__argv[mets_file]}" \
296
- -d "${ocrd__argv[working_dir]}" \
297
- ${ocrd__argv[mets_server_url]:+-U} ${ocrd__argv[mets_server_url]:-} \
298
- -p "$params_json" \
299
- -I "${ocrd__argv[input_file_grp]}" \
300
- -O "${ocrd__argv[output_file_grp]}" \
301
- ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) )
302
- IFS=$' \t\n'
303
- }
304
-
305
- ## usage: pageId=$(ocrd__input_file 3 pageId)
306
- ocrd__input_file() {
307
- declare -A input_file
308
- eval input_file=( "${ocrd__files[$1]}" )
309
- eval echo "${input_file[$2]}"
310
- }
File without changes
File without changes