ocrd 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +2 -4
- ocrd/cli/bashlib.py +6 -117
- ocrd/cli/network.py +2 -0
- ocrd/cli/resmgr.py +29 -65
- ocrd/constants.py +0 -2
- ocrd/mets_server.py +5 -5
- ocrd/processor/base.py +6 -16
- ocrd/processor/builtin/dummy/ocrd-tool.json +25 -0
- ocrd/processor/builtin/merge_processor.py +131 -0
- ocrd/processor/builtin/param_command_header2unordered.json +7 -0
- ocrd/processor/builtin/param_command_heading2unordered.json +7 -0
- ocrd/processor/builtin/param_command_lines2orientation.json +6 -0
- ocrd/processor/builtin/param_command_page-update-version.json +5 -0
- ocrd/processor/builtin/param_command_transkribus-to-prima.json +8 -0
- ocrd/processor/builtin/shell_processor.py +128 -0
- ocrd/resource_manager.py +213 -124
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/METADATA +23 -10
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/RECORD +40 -34
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/entry_points.txt +2 -0
- ocrd_models/ocrd_agent.py +3 -3
- ocrd_network/__init__.py +1 -0
- ocrd_network/cli/__init__.py +2 -0
- ocrd_network/cli/resmgr_server.py +23 -0
- ocrd_network/constants.py +3 -0
- ocrd_network/logging_utils.py +5 -0
- ocrd_network/models/job.py +29 -28
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +4 -4
- ocrd_network/resource_manager_server.py +182 -0
- ocrd_network/runtime_data/connection_clients.py +1 -1
- ocrd_network/runtime_data/hosts.py +43 -16
- ocrd_network/runtime_data/network_agents.py +15 -1
- ocrd_utils/__init__.py +5 -1
- ocrd_utils/constants.py +5 -0
- ocrd_utils/logging.py +3 -0
- ocrd_utils/os.py +142 -62
- ocrd_validators/ocrd_tool.schema.yml +7 -4
- ocrd/cli/log.py +0 -56
- ocrd/lib.bash +0 -310
- ocrd/resource_list.yml +0 -61
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/LICENSE +0 -0
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/WHEEL +0 -0
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ocrd
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.8.0
|
|
4
4
|
Summary: OCR-D framework
|
|
5
5
|
Author-email: Konstantin Baierer <unixprog@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -16,12 +16,13 @@ Requires-Dist: beanie~=1.7
|
|
|
16
16
|
Requires-Dist: click>=7
|
|
17
17
|
Requires-Dist: cryptography<43.0.0
|
|
18
18
|
Requires-Dist: Deprecated==1.2.0
|
|
19
|
-
Requires-Dist: docker
|
|
19
|
+
Requires-Dist: docker>=7.1.0
|
|
20
20
|
Requires-Dist: elementpath
|
|
21
21
|
Requires-Dist: fastapi>=0.78.0
|
|
22
22
|
Requires-Dist: filetype
|
|
23
23
|
Requires-Dist: Flask
|
|
24
24
|
Requires-Dist: frozendict>=2.4.0
|
|
25
|
+
Requires-Dist: gitpython
|
|
25
26
|
Requires-Dist: gdown
|
|
26
27
|
Requires-Dist: httpx>=0.22.0
|
|
27
28
|
Requires-Dist: jsonschema>=4
|
|
@@ -35,7 +36,7 @@ Requires-Dist: opencv-python-headless
|
|
|
35
36
|
Requires-Dist: paramiko
|
|
36
37
|
Requires-Dist: pika>=1.2.0
|
|
37
38
|
Requires-Dist: Pillow>=7.2.0
|
|
38
|
-
Requires-Dist: pydantic
|
|
39
|
+
Requires-Dist: pydantic>=2.0.0
|
|
39
40
|
Requires-Dist: python-magic
|
|
40
41
|
Requires-Dist: python-multipart
|
|
41
42
|
Requires-Dist: pyyaml
|
|
@@ -68,6 +69,9 @@ Requires-Dist: shapely>=2; python_version >= "3.9"
|
|
|
68
69
|
* [Command line tools](#command-line-tools)
|
|
69
70
|
* [`ocrd` CLI](#ocrd-cli)
|
|
70
71
|
* [`ocrd-dummy` CLI](#ocrd-dummy-cli)
|
|
72
|
+
* [`ocrd-filter` CLI](#ocrd-filter-cli)
|
|
73
|
+
* [`ocrd-command` CLI](#ocrd-command-cli)
|
|
74
|
+
* [`ocrd-merge` CLI](#ocrd-merge-cli)
|
|
71
75
|
* [Configuration](#configuration)
|
|
72
76
|
* [Packages](#packages)
|
|
73
77
|
* [ocrd_utils](#ocrd_utils)
|
|
@@ -76,7 +80,6 @@ Requires-Dist: shapely>=2; python_version >= "3.9"
|
|
|
76
80
|
* [ocrd_validators](#ocrd_validators)
|
|
77
81
|
* [ocrd_network](#ocrd_network)
|
|
78
82
|
* [ocrd](#ocrd)
|
|
79
|
-
* [bash library](#bash-library)
|
|
80
83
|
* [Testing](#testing)
|
|
81
84
|
* [See Also](#see-also)
|
|
82
85
|
|
|
@@ -121,6 +124,22 @@ supported flags, options and arguments.
|
|
|
121
124
|
|
|
122
125
|
A minimal [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that copies from `-I/-input-file-grp` to `-O/-output-file-grp`
|
|
123
126
|
|
|
127
|
+
### `ocrd-filter` CLI
|
|
128
|
+
|
|
129
|
+
A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that removes segments in PAGE-XML files from `-I/-input-file-grp` to `-O/-output-file-grp` with arbitrary selection based on powerful XPath 2.0 expressions.
|
|
130
|
+
|
|
131
|
+
### `ocrd-command` CLI
|
|
132
|
+
|
|
133
|
+
A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that runs arbitrary shell commands to transform PAGE-XML files from `-I/-input-file-grp` to `-O/-output-file-grp` (in effect "wrapping" them for OCR-D).
|
|
134
|
+
|
|
135
|
+
### `ocrd-merge` CLI
|
|
136
|
+
|
|
137
|
+
A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that (for every page) joins PAGE-XML files from multiple `-I/-input-file-grp` into a single `-O/-output-file-grp`, ensuring that
|
|
138
|
+
- `Border` polygons are joined
|
|
139
|
+
- all regions are concatenated, while
|
|
140
|
+
- ensuring segment identifiers do not clash,
|
|
141
|
+
- and the reading order simply gets concatenated.
|
|
142
|
+
|
|
124
143
|
## Configuration
|
|
125
144
|
|
|
126
145
|
Almost all behaviour of the OCR-D/core software is configured via CLI options and flags, which can be listed with the `--help` flag that all CLI support.
|
|
@@ -220,12 +239,6 @@ Also contains the command line tool `ocrd`.
|
|
|
220
239
|
|
|
221
240
|
See [README for `ocrd`](./README_ocrd.md) for further information.
|
|
222
241
|
|
|
223
|
-
## bash library
|
|
224
|
-
|
|
225
|
-
Builds a bash script that can be sourced by other bash scripts to create OCRD-compliant CLI.
|
|
226
|
-
|
|
227
|
-
See [README for `bashlib`](./README_bashlib.md) for further information.
|
|
228
|
-
|
|
229
242
|
## Testing
|
|
230
243
|
|
|
231
244
|
Download assets (`make assets`)
|
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
|
|
2
|
-
ocrd/constants.py,sha256=
|
|
3
|
-
ocrd/
|
|
4
|
-
ocrd/mets_server.py,sha256=eXIbSip6gYi5RyJZlriIrR9lUGpJjL0kXq3UDZPeqVs,22274
|
|
2
|
+
ocrd/constants.py,sha256=REPY-y28MMsrTWBNB4oOsvX3W06Xr2fvtv9wuWH9oAI,633
|
|
3
|
+
ocrd/mets_server.py,sha256=LbZ0U2_o0W7cWO639U7E816dXabro8-8yHGX0quvHn4,22304
|
|
5
4
|
ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
|
|
6
5
|
ocrd/resolver.py,sha256=7uwHRxaK8YMdKHe_a2dfrcNwL6UhQRJRVBrIX7GST7Q,15443
|
|
7
|
-
ocrd/
|
|
8
|
-
ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
|
|
6
|
+
ocrd/resource_manager.py,sha256=2wo3JSCYE1oA0VgI8H901IsC-fnx6vRJ5qSMFgYNorE,20664
|
|
9
7
|
ocrd/task_sequence.py,sha256=r4e4iaP9AXzTL2xQZpfYnHuFXty5pE-ym3gIyUz1aJc,7180
|
|
10
8
|
ocrd/workspace.py,sha256=UL_gX0KA-MmpayBl9KGYTfcl-1Canj8S991G9RHhu70,65216
|
|
11
9
|
ocrd/workspace_backup.py,sha256=aUOnYeJ-nWu-Zve27B0cYd9ZtBkmQX4F4Wim2UcrR5I,3624
|
|
12
10
|
ocrd/workspace_bagger.py,sha256=4viSQoWteW0V4B_blB6asJXd4-qniGGJyCPfKnrsyrY,12054
|
|
13
|
-
ocrd/cli/__init__.py,sha256
|
|
14
|
-
ocrd/cli/bashlib.py,sha256=
|
|
15
|
-
ocrd/cli/
|
|
16
|
-
ocrd/cli/network.py,sha256=iQ0AhQRGvIFyJY9RBArUiA_wuz7IfNKvU4L8KpVggnY,530
|
|
11
|
+
ocrd/cli/__init__.py,sha256=-n2jpGBZs_OMpI31E7CljGVdoFxDhgCAYwibcl_vp1Q,2838
|
|
12
|
+
ocrd/cli/bashlib.py,sha256=sEpTKbqM5DEo6838Ki5aFU8QsokA2SqQ841gcBu7M5M,1148
|
|
13
|
+
ocrd/cli/network.py,sha256=HA-JeyedsZksTQzuDoSBwyxEHIyIlc7oCmvNNDMA4vA,615
|
|
17
14
|
ocrd/cli/ocrd_tool.py,sha256=kB3Y3tj7Fpz6Ts4KgVlznhXpAx8gCDvJTnO39j8SGL4,7679
|
|
18
15
|
ocrd/cli/process.py,sha256=yfhBSYmuY5k2AccKwiNvG9hCDx1coYyWjq9BBwYaL3Y,1234
|
|
19
|
-
ocrd/cli/resmgr.py,sha256=
|
|
16
|
+
ocrd/cli/resmgr.py,sha256=7hRRi8EryQwakRdZgguee3ercA5_T48BKGWWfgAVfzM,8072
|
|
20
17
|
ocrd/cli/validate.py,sha256=P8jrzAnoU-5TUjLNA7s_ZMY2Krw5Y-SVIZPhdOk25cw,5931
|
|
21
18
|
ocrd/cli/workspace.py,sha256=0UzKN7vvD0n5wwxldzLHOlikDDIyiBiV1PuTOKCnnnE,41279
|
|
22
19
|
ocrd/cli/zip.py,sha256=3HMUbVsPTK3SRuF5oZnCZLjoqXJK-AYpA-rMqenY858,5965
|
|
@@ -26,19 +23,26 @@ ocrd/decorators/mets_find_options.py,sha256=8fiSdk-415o6-iBPB2T9He_v52qE8cTj3cCn
|
|
|
26
23
|
ocrd/decorators/ocrd_cli_options.py,sha256=Bemkq3V3QkOI3nNqGzphaNW7gjU9vNN-M5F2DvxvioM,2479
|
|
27
24
|
ocrd/decorators/parameter_option.py,sha256=TnCIcV9L5oAnI1Ew2TyFzo5FAwiIzWl2pn8oaD9jfEU,1056
|
|
28
25
|
ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
|
|
29
|
-
ocrd/processor/base.py,sha256=
|
|
26
|
+
ocrd/processor/base.py,sha256=DxBsRn8VLsfNvc9_2BU0KxUv4t9XtHSSu9uiabxn8Nk,59850
|
|
30
27
|
ocrd/processor/helpers.py,sha256=4lR_QvZsxvh7f8_uK9YzdHP5-hvFU4qqYM_Cu_k41KI,10937
|
|
31
28
|
ocrd/processor/ocrd_page_result.py,sha256=qo9pGV4r9S5--NAq5clIJOfs4b1vavoDOTbDqAEAAKA,507
|
|
32
29
|
ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
30
|
ocrd/processor/builtin/dummy_processor.py,sha256=SmMRtN0w88kBU24654ThT-yf84SFsFW4BOcmwsDDWdc,3533
|
|
34
31
|
ocrd/processor/builtin/filter_processor.py,sha256=9mbMq_XTJa8wrlbNdf46GUMNdjedz-enxafsCrnNhEo,4295
|
|
32
|
+
ocrd/processor/builtin/merge_processor.py,sha256=UvYgB73Y9lzJSMMNaVO6nk3rAcONHvQ-E-XAhaseZno,4655
|
|
33
|
+
ocrd/processor/builtin/param_command_header2unordered.json,sha256=K6xEcDXc3Qsaxt96wdISK22UxHf517O-E8JHryDwAfE,307
|
|
34
|
+
ocrd/processor/builtin/param_command_heading2unordered.json,sha256=nBenDJYlV-POfoM2R6izxcA5pW3rz-czGaoKudj3OTY,309
|
|
35
|
+
ocrd/processor/builtin/param_command_lines2orientation.json,sha256=2cGv5fXqAVAUyP8K1IT6vrSLCGhAtdXAvVxdSfjp8KQ,282
|
|
36
|
+
ocrd/processor/builtin/param_command_page-update-version.json,sha256=4JsXEltEOG89Q8PT8eBxQXnxp3Mez3EsRz4GfshjJEY,267
|
|
37
|
+
ocrd/processor/builtin/param_command_transkribus-to-prima.json,sha256=AvPNNS5uBmh69i6irRhIdHN9gE28yN9ufV7jfpFAeME,472
|
|
38
|
+
ocrd/processor/builtin/shell_processor.py,sha256=aWsB_m7o4ypG1DBAE0sNMFnaw9ptONqchLLl06KgTEo,5888
|
|
35
39
|
ocrd/processor/builtin/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=
|
|
40
|
+
ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=t_M3HABw7k_Ufi1L9Mr4t3LSCRnu0HH8-fvEs3u2PQY,3487
|
|
37
41
|
ocrd_modelfactory/__init__.py,sha256=sjAwPwDzetvPHdV6nPquHtMdFUBYRmo1P-VKER9YCWM,4404
|
|
38
42
|
ocrd_models/__init__.py,sha256=A0aj0mOraNb-xfiUueACdoaqISnp0qH-F49nTJg2vCs,380
|
|
39
43
|
ocrd_models/constants.py,sha256=R7-jOGabFd8HP0qRWfTMk0RcUmdwN-jhmDVbUW_QfU4,6961
|
|
40
44
|
ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,1203
|
|
41
|
-
ocrd_models/ocrd_agent.py,sha256=
|
|
45
|
+
ocrd_models/ocrd_agent.py,sha256=Nm0XDNCmWZ8O3xsXaY-WmEghttXmh90UKmAObCL99IY,5617
|
|
42
46
|
ocrd_models/ocrd_exif.py,sha256=HSLPn_WBDRIlMtKNYilLHm8WjX-b14HgnqT_KfzjS_0,4680
|
|
43
47
|
ocrd_models/ocrd_file.py,sha256=9-mfDb91RVy3p9rKryl-C39P4Of6Rb8OZBuxAee4VrI,9723
|
|
44
48
|
ocrd_models/ocrd_mets.py,sha256=lz9mlDq9A9UmZDoN8lh5XRnBzdAtLLZywDZSbyZPS84,50905
|
|
@@ -48,29 +52,31 @@ ocrd_models/ocrd_xml_base.py,sha256=iOnDl2zBNhN-Q4moLWiFkSqXvfRzxE5wbp5Tjsu1W6A,
|
|
|
48
52
|
ocrd_models/report.py,sha256=CX-t9ZDi2VmAy8M1Azsh83UsvE_f5pMeEC7tPaA-ztU,2021
|
|
49
53
|
ocrd_models/utils.py,sha256=A-H11ZJ65ZjH4DPK9s_Yz6JtA9fbTQ2jY-__9s7Hrg8,2320
|
|
50
54
|
ocrd_models/xpath_functions.py,sha256=VM2f9hl8ja4NrDOEQRSYdx7GewwAxfoyGMDjqjgA_7g,1439
|
|
51
|
-
ocrd_network/__init__.py,sha256=
|
|
55
|
+
ocrd_network/__init__.py,sha256=NWlSgXi7z45ow37AmITxfCB1d-L39rO8ttyxNJ-z8G0,376
|
|
52
56
|
ocrd_network/client.py,sha256=pL-g79cQgulXyGYgLOh--oxl1hZEMu48PTbuvMW1jIE,3007
|
|
53
57
|
ocrd_network/client_utils.py,sha256=Ne1a0fteb-TBuc0EAD6X_fh2RAl4hmPt2oluhpB28iU,5371
|
|
54
|
-
ocrd_network/constants.py,sha256=
|
|
58
|
+
ocrd_network/constants.py,sha256=XyRYjFO38yIBD6s1wsA-z6V16tBmbUw4LXlFkj-tQC8,1943
|
|
55
59
|
ocrd_network/database.py,sha256=-SddvaMLKn0pjdONyvWmjxfPJd6viedAIp6Lj1sU1Zs,10705
|
|
56
|
-
ocrd_network/logging_utils.py,sha256=
|
|
60
|
+
ocrd_network/logging_utils.py,sha256=hXwS46FzY_HTh92DgnxTuARxj8C18bOBmFKVrvBlUgc,2409
|
|
57
61
|
ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
|
|
58
62
|
ocrd_network/process_helpers.py,sha256=t2qltUpRefzLwdSGsiUEOGYO4Pz2OH7arpgjmCAeXMU,3086
|
|
59
63
|
ocrd_network/processing_server.py,sha256=z21DvRleEeo0hkpc1-2z0jLKuf5WSipL95MVEns8eJE,38457
|
|
60
64
|
ocrd_network/processing_worker.py,sha256=5AtvIhfcePzltKj4SElh7Aj9zlUOEiMVPTjtXuFSbT8,12659
|
|
65
|
+
ocrd_network/resource_manager_server.py,sha256=Ihz2g9uhkPSJee9GL7485DFC4cORhro4JQI6QzHoUA4,7255
|
|
61
66
|
ocrd_network/server_cache.py,sha256=orfAMw3LwUnduRHFAB6MpfoORTDoPV4ntSdAcQHBOyI,13148
|
|
62
67
|
ocrd_network/server_utils.py,sha256=Lxby62gHvrSbHgpWXvyZGdsWajp2TFzyxjHdMZWBESk,10229
|
|
63
68
|
ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
|
|
64
69
|
ocrd_network/utils.py,sha256=yE-nV_sv171tPp7weIFOxYw6HJlxvGBmrS8b1rIHS7c,6760
|
|
65
|
-
ocrd_network/cli/__init__.py,sha256=
|
|
70
|
+
ocrd_network/cli/__init__.py,sha256=VBjjXcn-2O5gerqE6UdNfS-EkVFEVPQFHylsn8F9kfY,317
|
|
66
71
|
ocrd_network/cli/client.py,sha256=H5fiJhBqbFn4_B2p3V20GejGTIYO-mNglh3y5nzUGhs,10350
|
|
67
72
|
ocrd_network/cli/processing_server.py,sha256=NsuI0f9h4KDwe39YugmHo5cJ_29chcLLQ7DThKfPO7s,770
|
|
68
73
|
ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
|
|
74
|
+
ocrd_network/cli/resmgr_server.py,sha256=sc0VX_RehTbg8Qp7ht_DvVqsrdL5b9Zw3bBgWcAD13A,826
|
|
69
75
|
ocrd_network/models/__init__.py,sha256=eVYMZaktzlyHKx-zI7GLYyRlZd3Vi_lNgsqSSFwqb6U,475
|
|
70
|
-
ocrd_network/models/job.py,sha256=
|
|
71
|
-
ocrd_network/models/messages.py,sha256=
|
|
76
|
+
ocrd_network/models/job.py,sha256=9bwp8DFoRH96WnRpkDV3XRfXCBiupzK6WXjqPsTcvLg,4440
|
|
77
|
+
ocrd_network/models/messages.py,sha256=OUDTjUiaATStsSAHCEDilUhBSruPsjpBtIBsllqN2Z0,672
|
|
72
78
|
ocrd_network/models/workflow.py,sha256=GL8q7RX9fGdXG3iVyJpCeLXbWa-2qI_SIxqhzxs9VK8,189
|
|
73
|
-
ocrd_network/models/workspace.py,sha256=
|
|
79
|
+
ocrd_network/models/workspace.py,sha256=rZcBWNlQOZX2KukP79IDRrXJvZ-H5pPH3WpPuw72HBM,1596
|
|
74
80
|
ocrd_network/rabbitmq_utils/__init__.py,sha256=XLIqZhfin4I4m80G9B__UcP45Lz10_mEpMYLXGOByUk,741
|
|
75
81
|
ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
|
|
76
82
|
ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
|
|
@@ -80,20 +86,20 @@ ocrd_network/rabbitmq_utils/ocrd_messages.py,sha256=wwzfMWbXmOFo_nd32_XySCso91_U
|
|
|
80
86
|
ocrd_network/rabbitmq_utils/publisher.py,sha256=mw4XQQhRE1xUQVgEUseyG845iIgVO-9GdGwNH6nUFms,2433
|
|
81
87
|
ocrd_network/runtime_data/__init__.py,sha256=PnWuuagElbkTzGtPWQEk5wlFtDxqT7B48S0Zrgt8H68,320
|
|
82
88
|
ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
|
|
83
|
-
ocrd_network/runtime_data/connection_clients.py,sha256=
|
|
89
|
+
ocrd_network/runtime_data/connection_clients.py,sha256=HKf_aSfwg11JeH6qiQXnqxbnvNgCAMRnVIpj30k93Bc,4207
|
|
84
90
|
ocrd_network/runtime_data/deployer.py,sha256=j3tcauURZtu7MKcEIE9B5eMCMSYMbxhB8LmtK72Zk1c,5314
|
|
85
|
-
ocrd_network/runtime_data/hosts.py,sha256=
|
|
86
|
-
ocrd_network/runtime_data/network_agents.py,sha256=
|
|
91
|
+
ocrd_network/runtime_data/hosts.py,sha256=P1bLh1NjgL-ajgP-VhGCACvy6rgJ5nZhGKsHaldzatk,8921
|
|
92
|
+
ocrd_network/runtime_data/network_agents.py,sha256=wwN7IJei4UdlyfhjvdmB5TB4O0Gn8icSkArJe-suvAY,4523
|
|
87
93
|
ocrd_network/runtime_data/network_services.py,sha256=5aH3RNGCi1fBuSdRp_Xz0MzyD_FmnvPnaBYAiYY3gp4,7891
|
|
88
|
-
ocrd_utils/__init__.py,sha256=
|
|
94
|
+
ocrd_utils/__init__.py,sha256=Cl_lrZxjXuTZ_me4I_lpaFNTpSdacWhQetOtHdrkUsU,6057
|
|
89
95
|
ocrd_utils/config.py,sha256=Oe8JBGb8r4z274XNWcdMV-GApzxmAYO8hHmbAV5bXf8,12609
|
|
90
|
-
ocrd_utils/constants.py,sha256=
|
|
96
|
+
ocrd_utils/constants.py,sha256=6lqMLeJdkFBlvGVmGjcExWbRKzNU6QT0kADBb5BkcBc,3464
|
|
91
97
|
ocrd_utils/deprecate.py,sha256=luAqGWUSF-9DHmTd2lDiQoQPA5SrJazdoDPQYQ6A7Z4,1029
|
|
92
98
|
ocrd_utils/image.py,sha256=tG5WnNtrrvGjm2-r6NVs1Jm7z8fee3MuLKotAD6C2RU,24818
|
|
93
99
|
ocrd_utils/introspect.py,sha256=LPhgcUuoicQcURDCWlCpSdbfVyxID5vmQPXJ9vzuYV0,1977
|
|
94
|
-
ocrd_utils/logging.py,sha256
|
|
100
|
+
ocrd_utils/logging.py,sha256=-cCi_9kIzmLUixfnDcx2jq9IQuwMqrU-71RJhKOQilQ,7929
|
|
95
101
|
ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
|
|
96
|
-
ocrd_utils/os.py,sha256=
|
|
102
|
+
ocrd_utils/os.py,sha256=Fmy-Q4OMGlbHLMH9jhPRAxRN5curyEXrYoWJxztIl1w,13756
|
|
97
103
|
ocrd_utils/str.py,sha256=4P0MdX0LCTqDTnsi_y5wNOBXW_TuTFANF7NYRXjo4x0,10136
|
|
98
104
|
ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
|
|
99
105
|
ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
|
|
@@ -103,7 +109,7 @@ ocrd_validators/message_processing.schema.yml,sha256=HL7o96-7ejslVMXcp16sbo5IjfU
|
|
|
103
109
|
ocrd_validators/message_result.schema.yml,sha256=G6vt_JgIU7OGSaHj-2Jna6KWQ3bFWol5tnBArWEiVjM,681
|
|
104
110
|
ocrd_validators/mets.xsd,sha256=0Wrs9bObn0n-yEEIWyguIcUUuuP6KMEjD4I_p1_UlwY,138290
|
|
105
111
|
ocrd_validators/ocrd_network_message_validator.py,sha256=oafNWOjieBmTHFfYeCtyFFpW1gI0lDT6ycRr5Kvmfq0,561
|
|
106
|
-
ocrd_validators/ocrd_tool.schema.yml,sha256=
|
|
112
|
+
ocrd_validators/ocrd_tool.schema.yml,sha256=fDNr-QdEOBtYbz8aHmjdOUirPBKr3vfLUDtC88gu75U,10231
|
|
107
113
|
ocrd_validators/ocrd_tool_validator.py,sha256=0DWuyyOSbdbrrQ5kEfWZv_qp5rSmLzmFMUKcPGfCBgM,749
|
|
108
114
|
ocrd_validators/ocrd_zip_validator.py,sha256=t-cYIZ5llZSQ2EspFzm0m-FajkLRfAFTISmXe27wMtA,3720
|
|
109
115
|
ocrd_validators/page.xsd,sha256=abQ8C3gRLPMFm8lH62aTCfvTIWI23TpgEDcaW9YCt7I,85770
|
|
@@ -117,9 +123,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
|
|
|
117
123
|
ocrd_validators/xsd_mets_validator.py,sha256=YgiuNtwNDtn3LuvdFFscnmsGREF_wQ4wtA76yE2Iljw,469
|
|
118
124
|
ocrd_validators/xsd_page_validator.py,sha256=ggt-nmaz-DDyAPwm3ZMVvtChuV2BJ2ZEEbWpePL9vTk,469
|
|
119
125
|
ocrd_validators/xsd_validator.py,sha256=ahJo_oVvTK_JB0Cu4CkMC8l_gbzsyW91AxGtelMjqrg,2115
|
|
120
|
-
ocrd-3.
|
|
121
|
-
ocrd-3.
|
|
122
|
-
ocrd-3.
|
|
123
|
-
ocrd-3.
|
|
124
|
-
ocrd-3.
|
|
125
|
-
ocrd-3.
|
|
126
|
+
ocrd-3.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
127
|
+
ocrd-3.8.0.dist-info/METADATA,sha256=ieqTzchmz7OXQV6vlize7XzWDzWEk5ypLXtn4VTduHY,11396
|
|
128
|
+
ocrd-3.8.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
129
|
+
ocrd-3.8.0.dist-info/entry_points.txt,sha256=CI-NoDR1BYmsuAsJmPAn4NrN9guzdedHGUbC8QSmdGs,266
|
|
130
|
+
ocrd-3.8.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
|
|
131
|
+
ocrd-3.8.0.dist-info/RECORD,,
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
ocrd = ocrd.cli:cli
|
|
3
|
+
ocrd-command = ocrd.processor.builtin.shell_processor:cli
|
|
3
4
|
ocrd-dummy = ocrd.processor.builtin.dummy_processor:cli
|
|
4
5
|
ocrd-filter = ocrd.processor.builtin.filter_processor:cli
|
|
6
|
+
ocrd-merge = ocrd.processor.builtin.merge_processor:cli
|
ocrd_models/ocrd_agent.py
CHANGED
|
@@ -43,7 +43,7 @@ class OcrdAgent():
|
|
|
43
43
|
self.otherrole = otherrole
|
|
44
44
|
self.notes = notes
|
|
45
45
|
|
|
46
|
-
def
|
|
46
|
+
def __repr__(self):
|
|
47
47
|
"""
|
|
48
48
|
String representation
|
|
49
49
|
"""
|
|
@@ -142,7 +142,7 @@ class OcrdAgent():
|
|
|
142
142
|
"""
|
|
143
143
|
el_notes = self._el.findall(TAG_METS_NOTE)
|
|
144
144
|
if el_notes is not None:
|
|
145
|
-
return [(note.attrib, note.text)
|
|
145
|
+
return [(dict(note.attrib), note.text)
|
|
146
146
|
for note in el_notes]
|
|
147
147
|
|
|
148
148
|
@notes.setter
|
|
@@ -190,7 +190,7 @@ class ClientSideOcrdAgent():
|
|
|
190
190
|
self.otherrole = otherrole
|
|
191
191
|
self.notes = notes
|
|
192
192
|
|
|
193
|
-
def
|
|
193
|
+
def __repr__(self):
|
|
194
194
|
props = ', '.join([
|
|
195
195
|
'='.join([k, getattr(self, k) if getattr(self, k) else '---'])
|
|
196
196
|
for k in ['type', 'othertype', 'role', 'otherrole', 'name']
|
ocrd_network/__init__.py
CHANGED
|
@@ -3,4 +3,5 @@ from .constants import JobState
|
|
|
3
3
|
from .processing_server import ProcessingServer
|
|
4
4
|
from .processing_worker import ProcessingWorker
|
|
5
5
|
from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
|
|
6
|
+
from .resource_manager_server import ResourceManagerServer
|
|
6
7
|
from .server_cache import CacheLockedPages, CacheProcessingRequests
|
ocrd_network/cli/__init__.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from .client import client_cli
|
|
2
2
|
from .processing_server import processing_server_cli
|
|
3
3
|
from .processing_worker import processing_worker_cli
|
|
4
|
+
from .resmgr_server import resource_manager_server_cli
|
|
4
5
|
|
|
5
6
|
__all__ = [
|
|
6
7
|
'client_cli',
|
|
7
8
|
'processing_server_cli',
|
|
8
9
|
'processing_worker_cli',
|
|
10
|
+
'resource_manager_server_cli'
|
|
9
11
|
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from ocrd_network import ResourceManagerServer, ServerAddressParamType
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@click.command('resmgr-server')
|
|
6
|
+
@click.option('-a', '--address',
|
|
7
|
+
help='The URL of the OCR-D resource manager server, format: host:port',
|
|
8
|
+
type=ServerAddressParamType(),
|
|
9
|
+
required=True)
|
|
10
|
+
def resource_manager_server_cli(address: str):
|
|
11
|
+
"""
|
|
12
|
+
Start standalone REST API OCR-D Resource Manager Server
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
# Note, the address is already validated with the type field
|
|
16
|
+
host, port = address.split(':')
|
|
17
|
+
resource_manager_server = ResourceManagerServer(
|
|
18
|
+
host = host,
|
|
19
|
+
port = int(port)
|
|
20
|
+
)
|
|
21
|
+
resource_manager_server.start()
|
|
22
|
+
except Exception as e:
|
|
23
|
+
raise Exception("OCR-D Resource Manager Server has failed with error") from e
|
ocrd_network/constants.py
CHANGED
|
@@ -10,6 +10,8 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
|
|
|
10
10
|
# Used as a placeholder to lock all pages when no page_id is specified
|
|
11
11
|
SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
|
|
12
12
|
|
|
13
|
+
# TODO: Make this more configurable
|
|
14
|
+
RESOURCE_MANAGER_SERVER_PORT = 45555
|
|
13
15
|
|
|
14
16
|
class StrEnum(str, Enum):
|
|
15
17
|
def __str__(self):
|
|
@@ -48,6 +50,7 @@ class NetworkLoggingDirs(StrEnum):
|
|
|
48
50
|
PROCESSING_JOBS = "processing_jobs"
|
|
49
51
|
PROCESSING_SERVERS = "processing_servers"
|
|
50
52
|
PROCESSING_WORKERS = "processing_workers"
|
|
53
|
+
RESOURCE_MANAGER_SERVERS = "resource_manager_servers"
|
|
51
54
|
|
|
52
55
|
|
|
53
56
|
class ServerApiTags(StrEnum):
|
ocrd_network/logging_utils.py
CHANGED
|
@@ -56,3 +56,8 @@ def get_processing_server_logging_file_path(pid: int) -> Path:
|
|
|
56
56
|
def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Path:
|
|
57
57
|
log_file: str = f"worker.{pid}.{processor_name}.log"
|
|
58
58
|
return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSING_WORKERS), log_file)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_resource_manager_server_logging_file_path(pid: int) -> Path:
|
|
62
|
+
log_file: str = f"resource_manager_server.{pid}.log"
|
|
63
|
+
return Path(get_root_logging_dir(NetworkLoggingDirs.RESOURCE_MANAGER_SERVERS), log_file)
|
ocrd_network/models/job.py
CHANGED
|
@@ -13,7 +13,7 @@ class PYJobInput(BaseModel):
|
|
|
13
13
|
workspace_id: Optional[str] = None
|
|
14
14
|
description: Optional[str] = None
|
|
15
15
|
input_file_grps: List[str]
|
|
16
|
-
output_file_grps: Optional[List[str]]
|
|
16
|
+
output_file_grps: Optional[List[str]] = None
|
|
17
17
|
page_id: Optional[str] = None
|
|
18
18
|
parameters: dict = {} # Always set to empty dict when None, otherwise it fails ocr-d-validation
|
|
19
19
|
result_queue_name: Optional[str] = None
|
|
@@ -23,8 +23,8 @@ class PYJobInput(BaseModel):
|
|
|
23
23
|
# If set, specifies a list of job ids this job depends on
|
|
24
24
|
depends_on: Optional[List[str]] = None
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
model_config = {
|
|
27
|
+
'json_schema_extra': {
|
|
28
28
|
'example': {
|
|
29
29
|
'path_to_mets': '/path/to/mets.xml',
|
|
30
30
|
'description': 'The description of this execution',
|
|
@@ -34,6 +34,7 @@ class PYJobInput(BaseModel):
|
|
|
34
34
|
'parameters': {}
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
+
}
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
class PYJobOutput(BaseModel):
|
|
@@ -42,12 +43,12 @@ class PYJobOutput(BaseModel):
|
|
|
42
43
|
job_id: str
|
|
43
44
|
processor_name: str
|
|
44
45
|
state: JobState = JobState.unset
|
|
45
|
-
path_to_mets: Optional[str]
|
|
46
|
-
workspace_id: Optional[str]
|
|
46
|
+
path_to_mets: Optional[str] = None
|
|
47
|
+
workspace_id: Optional[str] = None
|
|
47
48
|
input_file_grps: List[str]
|
|
48
|
-
output_file_grps: Optional[List[str]]
|
|
49
|
+
output_file_grps: Optional[List[str]] = None
|
|
49
50
|
page_id: Optional[str] = None
|
|
50
|
-
log_file_path: Optional[str]
|
|
51
|
+
log_file_path: Optional[str] = None
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
class DBProcessorJob(Document):
|
|
@@ -55,22 +56,22 @@ class DBProcessorJob(Document):
|
|
|
55
56
|
"""
|
|
56
57
|
job_id: str
|
|
57
58
|
processor_name: str
|
|
58
|
-
path_to_mets: Optional[str]
|
|
59
|
-
workspace_id: Optional[str]
|
|
60
|
-
description: Optional[str]
|
|
59
|
+
path_to_mets: Optional[str] = None
|
|
60
|
+
workspace_id: Optional[str] = None
|
|
61
|
+
description: Optional[str] = None
|
|
61
62
|
state: JobState = JobState.unset
|
|
62
63
|
input_file_grps: List[str]
|
|
63
|
-
output_file_grps: Optional[List[str]]
|
|
64
|
-
page_id: Optional[str]
|
|
65
|
-
parameters: Optional[dict]
|
|
66
|
-
depends_on: Optional[List[str]]
|
|
67
|
-
result_queue_name: Optional[str]
|
|
68
|
-
callback_url: Optional[str]
|
|
69
|
-
internal_callback_url: Optional[str]
|
|
70
|
-
start_time: Optional[datetime]
|
|
71
|
-
end_time: Optional[datetime]
|
|
72
|
-
exec_time: Optional[str]
|
|
73
|
-
log_file_path: Optional[str]
|
|
64
|
+
output_file_grps: Optional[List[str]] = None
|
|
65
|
+
page_id: Optional[str] = None
|
|
66
|
+
parameters: Optional[dict] = None
|
|
67
|
+
depends_on: Optional[List[str]] = None
|
|
68
|
+
result_queue_name: Optional[str] = None
|
|
69
|
+
callback_url: Optional[str] = None
|
|
70
|
+
internal_callback_url: Optional[str] = None
|
|
71
|
+
start_time: Optional[datetime] = None
|
|
72
|
+
end_time: Optional[datetime] = None
|
|
73
|
+
exec_time: Optional[str] = None
|
|
74
|
+
log_file_path: Optional[str] = None
|
|
74
75
|
|
|
75
76
|
class Settings:
|
|
76
77
|
use_enum_values = True
|
|
@@ -99,9 +100,9 @@ class PYWorkflowJobOutput(BaseModel):
|
|
|
99
100
|
page_id: str
|
|
100
101
|
page_wise: bool = False
|
|
101
102
|
job_id: str
|
|
102
|
-
path_to_mets: Optional[str]
|
|
103
|
-
workspace_id: Optional[str]
|
|
104
|
-
description: Optional[str]
|
|
103
|
+
path_to_mets: Optional[str] = None
|
|
104
|
+
workspace_id: Optional[str] = None
|
|
105
|
+
description: Optional[str] = None
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
class DBWorkflowJob(Document):
|
|
@@ -114,10 +115,10 @@ class DBWorkflowJob(Document):
|
|
|
114
115
|
# key: page_id
|
|
115
116
|
# value: List of and processing job ids sorted in dependency order
|
|
116
117
|
processing_job_ids: Dict
|
|
117
|
-
path_to_mets: Optional[str]
|
|
118
|
-
workspace_id: Optional[str]
|
|
119
|
-
description: Optional[str]
|
|
120
|
-
workflow_callback_url: Optional[str]
|
|
118
|
+
path_to_mets: Optional[str] = None
|
|
119
|
+
workspace_id: Optional[str] = None
|
|
120
|
+
description: Optional[str] = None
|
|
121
|
+
workflow_callback_url: Optional[str] = None
|
|
121
122
|
|
|
122
123
|
class Settings:
|
|
123
124
|
use_enum_values = True
|
ocrd_network/models/messages.py
CHANGED
|
@@ -11,8 +11,8 @@ class PYResultMessage(BaseModel):
|
|
|
11
11
|
path_to_mets: Optional[str] = None
|
|
12
12
|
workspace_id: Optional[str] = None
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
model_config = {
|
|
15
|
+
"json_schema_extra": {
|
|
16
16
|
"example": {
|
|
17
17
|
"job_id": "d8e36726-ed28-5476-b83c-bc31d2eecf1f",
|
|
18
18
|
"state": JobState.success,
|
|
@@ -20,3 +20,4 @@ class PYResultMessage(BaseModel):
|
|
|
20
20
|
"workspace_id": "c7f25615-fc17-4365-a74d-ad20e1ddbd0e"
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
|
+
}
|
ocrd_network/models/workspace.py
CHANGED
|
@@ -25,10 +25,10 @@ class DBWorkspace(Document):
|
|
|
25
25
|
workspace_mets_path: str
|
|
26
26
|
ocrd_identifier: str
|
|
27
27
|
bagit_profile_identifier: str
|
|
28
|
-
ocrd_base_version_checksum: Optional[str]
|
|
29
|
-
ocrd_mets: Optional[str]
|
|
30
|
-
bag_info_adds: Optional[dict]
|
|
31
|
-
mets_server_url: Optional[str]
|
|
28
|
+
ocrd_base_version_checksum: Optional[str] = None
|
|
29
|
+
ocrd_mets: Optional[str] = None
|
|
30
|
+
bag_info_adds: Optional[dict] = None
|
|
31
|
+
mets_server_url: Optional[str] = None
|
|
32
32
|
deleted: bool = False
|
|
33
33
|
|
|
34
34
|
class Settings:
|