ocrd 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ocrd/cli/__init__.py +2 -4
  2. ocrd/cli/bashlib.py +6 -117
  3. ocrd/cli/network.py +2 -0
  4. ocrd/cli/resmgr.py +29 -65
  5. ocrd/constants.py +0 -2
  6. ocrd/mets_server.py +5 -5
  7. ocrd/processor/base.py +6 -16
  8. ocrd/processor/builtin/dummy/ocrd-tool.json +25 -0
  9. ocrd/processor/builtin/merge_processor.py +131 -0
  10. ocrd/processor/builtin/param_command_header2unordered.json +7 -0
  11. ocrd/processor/builtin/param_command_heading2unordered.json +7 -0
  12. ocrd/processor/builtin/param_command_lines2orientation.json +6 -0
  13. ocrd/processor/builtin/param_command_page-update-version.json +5 -0
  14. ocrd/processor/builtin/param_command_transkribus-to-prima.json +8 -0
  15. ocrd/processor/builtin/shell_processor.py +128 -0
  16. ocrd/resource_manager.py +213 -124
  17. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/METADATA +23 -10
  18. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/RECORD +40 -34
  19. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/entry_points.txt +2 -0
  20. ocrd_models/ocrd_agent.py +3 -3
  21. ocrd_network/__init__.py +1 -0
  22. ocrd_network/cli/__init__.py +2 -0
  23. ocrd_network/cli/resmgr_server.py +23 -0
  24. ocrd_network/constants.py +3 -0
  25. ocrd_network/logging_utils.py +5 -0
  26. ocrd_network/models/job.py +29 -28
  27. ocrd_network/models/messages.py +3 -2
  28. ocrd_network/models/workspace.py +4 -4
  29. ocrd_network/resource_manager_server.py +182 -0
  30. ocrd_network/runtime_data/connection_clients.py +1 -1
  31. ocrd_network/runtime_data/hosts.py +43 -16
  32. ocrd_network/runtime_data/network_agents.py +15 -1
  33. ocrd_utils/__init__.py +5 -1
  34. ocrd_utils/constants.py +5 -0
  35. ocrd_utils/logging.py +3 -0
  36. ocrd_utils/os.py +142 -62
  37. ocrd_validators/ocrd_tool.schema.yml +7 -4
  38. ocrd/cli/log.py +0 -56
  39. ocrd/lib.bash +0 -310
  40. ocrd/resource_list.yml +0 -61
  41. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/LICENSE +0 -0
  42. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/WHEEL +0 -0
  43. {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.6.0
3
+ Version: 3.8.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -16,12 +16,13 @@ Requires-Dist: beanie~=1.7
16
16
  Requires-Dist: click>=7
17
17
  Requires-Dist: cryptography<43.0.0
18
18
  Requires-Dist: Deprecated==1.2.0
19
- Requires-Dist: docker
19
+ Requires-Dist: docker>=7.1.0
20
20
  Requires-Dist: elementpath
21
21
  Requires-Dist: fastapi>=0.78.0
22
22
  Requires-Dist: filetype
23
23
  Requires-Dist: Flask
24
24
  Requires-Dist: frozendict>=2.4.0
25
+ Requires-Dist: gitpython
25
26
  Requires-Dist: gdown
26
27
  Requires-Dist: httpx>=0.22.0
27
28
  Requires-Dist: jsonschema>=4
@@ -35,7 +36,7 @@ Requires-Dist: opencv-python-headless
35
36
  Requires-Dist: paramiko
36
37
  Requires-Dist: pika>=1.2.0
37
38
  Requires-Dist: Pillow>=7.2.0
38
- Requires-Dist: pydantic==1.*
39
+ Requires-Dist: pydantic>=2.0.0
39
40
  Requires-Dist: python-magic
40
41
  Requires-Dist: python-multipart
41
42
  Requires-Dist: pyyaml
@@ -68,6 +69,9 @@ Requires-Dist: shapely>=2; python_version >= "3.9"
68
69
  * [Command line tools](#command-line-tools)
69
70
  * [`ocrd` CLI](#ocrd-cli)
70
71
  * [`ocrd-dummy` CLI](#ocrd-dummy-cli)
72
+ * [`ocrd-filter` CLI](#ocrd-filter-cli)
73
+ * [`ocrd-command` CLI](#ocrd-command-cli)
74
+ * [`ocrd-merge` CLI](#ocrd-merge-cli)
71
75
  * [Configuration](#configuration)
72
76
  * [Packages](#packages)
73
77
  * [ocrd_utils](#ocrd_utils)
@@ -76,7 +80,6 @@ Requires-Dist: shapely>=2; python_version >= "3.9"
76
80
  * [ocrd_validators](#ocrd_validators)
77
81
  * [ocrd_network](#ocrd_network)
78
82
  * [ocrd](#ocrd)
79
- * [bash library](#bash-library)
80
83
  * [Testing](#testing)
81
84
  * [See Also](#see-also)
82
85
 
@@ -121,6 +124,22 @@ supported flags, options and arguments.
121
124
 
122
125
  A minimal [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that copies from `-I/-input-file-grp` to `-O/-output-file-grp`
123
126
 
127
+ ### `ocrd-filter` CLI
128
+
129
+ A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that removes segments in PAGE-XML files from `-I/-input-file-grp` to `-O/-output-file-grp` with arbitrary selection based on powerful XPath 2.0 expressions.
130
+
131
+ ### `ocrd-command` CLI
132
+
133
+ A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that runs arbitrary shell commands to transform PAGE-XML files from `-I/-input-file-grp` to `-O/-output-file-grp` (in effect "wrapping" them for OCR-D).
134
+
135
+ ### `ocrd-merge` CLI
136
+
137
+ A simple [OCR-D processor](https://ocr-d.de/en/user_guide#using-the-ocr-d-processors) that (for every page) joins PAGE-XML files from multiple `-I/-input-file-grp` into a single `-O/-output-file-grp`, ensuring that
138
+ - `Border` polygons are joined
139
+ - all regions are concatenated, while
140
+ - ensuring segment identifiers do not clash,
141
+ - and the reading order simply gets concatenated.
142
+
124
143
  ## Configuration
125
144
 
126
145
  Almost all behaviour of the OCR-D/core software is configured via CLI options and flags, which can be listed with the `--help` flag that all CLI support.
@@ -220,12 +239,6 @@ Also contains the command line tool `ocrd`.
220
239
 
221
240
  See [README for `ocrd`](./README_ocrd.md) for further information.
222
241
 
223
- ## bash library
224
-
225
- Builds a bash script that can be sourced by other bash scripts to create OCRD-compliant CLI.
226
-
227
- See [README for `bashlib`](./README_bashlib.md) for further information.
228
-
229
242
  ## Testing
230
243
 
231
244
  Download assets (`make assets`)
@@ -1,22 +1,19 @@
1
1
  ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
- ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
- ocrd/lib.bash,sha256=Eu-_Eb3uUNEoa3GmSDyUm_4FmmSJctglPOWz8EcBJ2c,10395
4
- ocrd/mets_server.py,sha256=eXIbSip6gYi5RyJZlriIrR9lUGpJjL0kXq3UDZPeqVs,22274
2
+ ocrd/constants.py,sha256=REPY-y28MMsrTWBNB4oOsvX3W06Xr2fvtv9wuWH9oAI,633
3
+ ocrd/mets_server.py,sha256=LbZ0U2_o0W7cWO639U7E816dXabro8-8yHGX0quvHn4,22304
5
4
  ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
5
  ocrd/resolver.py,sha256=7uwHRxaK8YMdKHe_a2dfrcNwL6UhQRJRVBrIX7GST7Q,15443
7
- ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
- ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
6
+ ocrd/resource_manager.py,sha256=2wo3JSCYE1oA0VgI8H901IsC-fnx6vRJ5qSMFgYNorE,20664
9
7
  ocrd/task_sequence.py,sha256=r4e4iaP9AXzTL2xQZpfYnHuFXty5pE-ym3gIyUz1aJc,7180
10
8
  ocrd/workspace.py,sha256=UL_gX0KA-MmpayBl9KGYTfcl-1Canj8S991G9RHhu70,65216
11
9
  ocrd/workspace_backup.py,sha256=aUOnYeJ-nWu-Zve27B0cYd9ZtBkmQX4F4Wim2UcrR5I,3624
12
10
  ocrd/workspace_bagger.py,sha256=4viSQoWteW0V4B_blB6asJXd4-qniGGJyCPfKnrsyrY,12054
13
- ocrd/cli/__init__.py,sha256=klgd85WNdLzqivbkEbt5cMOFvEoMoEE2KnT_oKAwTBs,2888
14
- ocrd/cli/bashlib.py,sha256=RLp6ejgbxLjGfNmeniVTuzLtrGeJrGJAZYc9JPT7U68,5998
15
- ocrd/cli/log.py,sha256=jRgxdoJS14OPXI9LpovPqB3JcxlcGZH7QOde6-TmykQ,1566
16
- ocrd/cli/network.py,sha256=iQ0AhQRGvIFyJY9RBArUiA_wuz7IfNKvU4L8KpVggnY,530
11
+ ocrd/cli/__init__.py,sha256=-n2jpGBZs_OMpI31E7CljGVdoFxDhgCAYwibcl_vp1Q,2838
12
+ ocrd/cli/bashlib.py,sha256=sEpTKbqM5DEo6838Ki5aFU8QsokA2SqQ841gcBu7M5M,1148
13
+ ocrd/cli/network.py,sha256=HA-JeyedsZksTQzuDoSBwyxEHIyIlc7oCmvNNDMA4vA,615
17
14
  ocrd/cli/ocrd_tool.py,sha256=kB3Y3tj7Fpz6Ts4KgVlznhXpAx8gCDvJTnO39j8SGL4,7679
18
15
  ocrd/cli/process.py,sha256=yfhBSYmuY5k2AccKwiNvG9hCDx1coYyWjq9BBwYaL3Y,1234
19
- ocrd/cli/resmgr.py,sha256=b1TMZ3D0d50RR9XjQdPil3sfaoTVAso8LjHXZ9P7WfI,10109
16
+ ocrd/cli/resmgr.py,sha256=7hRRi8EryQwakRdZgguee3ercA5_T48BKGWWfgAVfzM,8072
20
17
  ocrd/cli/validate.py,sha256=P8jrzAnoU-5TUjLNA7s_ZMY2Krw5Y-SVIZPhdOk25cw,5931
21
18
  ocrd/cli/workspace.py,sha256=0UzKN7vvD0n5wwxldzLHOlikDDIyiBiV1PuTOKCnnnE,41279
22
19
  ocrd/cli/zip.py,sha256=3HMUbVsPTK3SRuF5oZnCZLjoqXJK-AYpA-rMqenY858,5965
@@ -26,19 +23,26 @@ ocrd/decorators/mets_find_options.py,sha256=8fiSdk-415o6-iBPB2T9He_v52qE8cTj3cCn
26
23
  ocrd/decorators/ocrd_cli_options.py,sha256=Bemkq3V3QkOI3nNqGzphaNW7gjU9vNN-M5F2DvxvioM,2479
27
24
  ocrd/decorators/parameter_option.py,sha256=TnCIcV9L5oAnI1Ew2TyFzo5FAwiIzWl2pn8oaD9jfEU,1056
28
25
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=TOUy_s14aM-09HSSzihgCeTOSHRpTO4U4bze7QUSDwg,60382
26
+ ocrd/processor/base.py,sha256=DxBsRn8VLsfNvc9_2BU0KxUv4t9XtHSSu9uiabxn8Nk,59850
30
27
  ocrd/processor/helpers.py,sha256=4lR_QvZsxvh7f8_uK9YzdHP5-hvFU4qqYM_Cu_k41KI,10937
31
28
  ocrd/processor/ocrd_page_result.py,sha256=qo9pGV4r9S5--NAq5clIJOfs4b1vavoDOTbDqAEAAKA,507
32
29
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
30
  ocrd/processor/builtin/dummy_processor.py,sha256=SmMRtN0w88kBU24654ThT-yf84SFsFW4BOcmwsDDWdc,3533
34
31
  ocrd/processor/builtin/filter_processor.py,sha256=9mbMq_XTJa8wrlbNdf46GUMNdjedz-enxafsCrnNhEo,4295
32
+ ocrd/processor/builtin/merge_processor.py,sha256=UvYgB73Y9lzJSMMNaVO6nk3rAcONHvQ-E-XAhaseZno,4655
33
+ ocrd/processor/builtin/param_command_header2unordered.json,sha256=K6xEcDXc3Qsaxt96wdISK22UxHf517O-E8JHryDwAfE,307
34
+ ocrd/processor/builtin/param_command_heading2unordered.json,sha256=nBenDJYlV-POfoM2R6izxcA5pW3rz-czGaoKudj3OTY,309
35
+ ocrd/processor/builtin/param_command_lines2orientation.json,sha256=2cGv5fXqAVAUyP8K1IT6vrSLCGhAtdXAvVxdSfjp8KQ,282
36
+ ocrd/processor/builtin/param_command_page-update-version.json,sha256=4JsXEltEOG89Q8PT8eBxQXnxp3Mez3EsRz4GfshjJEY,267
37
+ ocrd/processor/builtin/param_command_transkribus-to-prima.json,sha256=AvPNNS5uBmh69i6irRhIdHN9gE28yN9ufV7jfpFAeME,472
38
+ ocrd/processor/builtin/shell_processor.py,sha256=aWsB_m7o4ypG1DBAE0sNMFnaw9ptONqchLLl06KgTEo,5888
35
39
  ocrd/processor/builtin/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=NgMAXN1AQpGk4Ss73ThDY4QyFPKhj54qcrdeCGwTb10,2339
40
+ ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=t_M3HABw7k_Ufi1L9Mr4t3LSCRnu0HH8-fvEs3u2PQY,3487
37
41
  ocrd_modelfactory/__init__.py,sha256=sjAwPwDzetvPHdV6nPquHtMdFUBYRmo1P-VKER9YCWM,4404
38
42
  ocrd_models/__init__.py,sha256=A0aj0mOraNb-xfiUueACdoaqISnp0qH-F49nTJg2vCs,380
39
43
  ocrd_models/constants.py,sha256=R7-jOGabFd8HP0qRWfTMk0RcUmdwN-jhmDVbUW_QfU4,6961
40
44
  ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,1203
41
- ocrd_models/ocrd_agent.py,sha256=fY3BQE7otU9KLHaKC9L7BjuU1PIXXTy17V-OAW8LzjI,5609
45
+ ocrd_models/ocrd_agent.py,sha256=Nm0XDNCmWZ8O3xsXaY-WmEghttXmh90UKmAObCL99IY,5617
42
46
  ocrd_models/ocrd_exif.py,sha256=HSLPn_WBDRIlMtKNYilLHm8WjX-b14HgnqT_KfzjS_0,4680
43
47
  ocrd_models/ocrd_file.py,sha256=9-mfDb91RVy3p9rKryl-C39P4Of6Rb8OZBuxAee4VrI,9723
44
48
  ocrd_models/ocrd_mets.py,sha256=lz9mlDq9A9UmZDoN8lh5XRnBzdAtLLZywDZSbyZPS84,50905
@@ -48,29 +52,31 @@ ocrd_models/ocrd_xml_base.py,sha256=iOnDl2zBNhN-Q4moLWiFkSqXvfRzxE5wbp5Tjsu1W6A,
48
52
  ocrd_models/report.py,sha256=CX-t9ZDi2VmAy8M1Azsh83UsvE_f5pMeEC7tPaA-ztU,2021
49
53
  ocrd_models/utils.py,sha256=A-H11ZJ65ZjH4DPK9s_Yz6JtA9fbTQ2jY-__9s7Hrg8,2320
50
54
  ocrd_models/xpath_functions.py,sha256=VM2f9hl8ja4NrDOEQRSYdx7GewwAxfoyGMDjqjgA_7g,1439
51
- ocrd_network/__init__.py,sha256=w6uj3joSsg5NAzQPDsHkuwVjHXukO_JrvcAX07cBlHA,317
55
+ ocrd_network/__init__.py,sha256=NWlSgXi7z45ow37AmITxfCB1d-L39rO8ttyxNJ-z8G0,376
52
56
  ocrd_network/client.py,sha256=pL-g79cQgulXyGYgLOh--oxl1hZEMu48PTbuvMW1jIE,3007
53
57
  ocrd_network/client_utils.py,sha256=Ne1a0fteb-TBuc0EAD6X_fh2RAl4hmPt2oluhpB28iU,5371
54
- ocrd_network/constants.py,sha256=iLHPX5H3g6QKhqIPXCNRILVG7eFUIWuuiBWKp0_dBLw,1812
58
+ ocrd_network/constants.py,sha256=XyRYjFO38yIBD6s1wsA-z6V16tBmbUw4LXlFkj-tQC8,1943
55
59
  ocrd_network/database.py,sha256=-SddvaMLKn0pjdONyvWmjxfPJd6viedAIp6Lj1sU1Zs,10705
56
- ocrd_network/logging_utils.py,sha256=Ez5f_BRp0SiW2sP71yTT7zOJmuMzHYPLNWObq5fBLCw,2188
60
+ ocrd_network/logging_utils.py,sha256=hXwS46FzY_HTh92DgnxTuARxj8C18bOBmFKVrvBlUgc,2409
57
61
  ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
58
62
  ocrd_network/process_helpers.py,sha256=t2qltUpRefzLwdSGsiUEOGYO4Pz2OH7arpgjmCAeXMU,3086
59
63
  ocrd_network/processing_server.py,sha256=z21DvRleEeo0hkpc1-2z0jLKuf5WSipL95MVEns8eJE,38457
60
64
  ocrd_network/processing_worker.py,sha256=5AtvIhfcePzltKj4SElh7Aj9zlUOEiMVPTjtXuFSbT8,12659
65
+ ocrd_network/resource_manager_server.py,sha256=Ihz2g9uhkPSJee9GL7485DFC4cORhro4JQI6QzHoUA4,7255
61
66
  ocrd_network/server_cache.py,sha256=orfAMw3LwUnduRHFAB6MpfoORTDoPV4ntSdAcQHBOyI,13148
62
67
  ocrd_network/server_utils.py,sha256=Lxby62gHvrSbHgpWXvyZGdsWajp2TFzyxjHdMZWBESk,10229
63
68
  ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
64
69
  ocrd_network/utils.py,sha256=yE-nV_sv171tPp7weIFOxYw6HJlxvGBmrS8b1rIHS7c,6760
65
- ocrd_network/cli/__init__.py,sha256=7dzZLbGHaqMGToNZhll-q87Jh4UP6NSuvxbVuUOIkqY,228
70
+ ocrd_network/cli/__init__.py,sha256=VBjjXcn-2O5gerqE6UdNfS-EkVFEVPQFHylsn8F9kfY,317
66
71
  ocrd_network/cli/client.py,sha256=H5fiJhBqbFn4_B2p3V20GejGTIYO-mNglh3y5nzUGhs,10350
67
72
  ocrd_network/cli/processing_server.py,sha256=NsuI0f9h4KDwe39YugmHo5cJ_29chcLLQ7DThKfPO7s,770
68
73
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
74
+ ocrd_network/cli/resmgr_server.py,sha256=sc0VX_RehTbg8Qp7ht_DvVqsrdL5b9Zw3bBgWcAD13A,826
69
75
  ocrd_network/models/__init__.py,sha256=eVYMZaktzlyHKx-zI7GLYyRlZd3Vi_lNgsqSSFwqb6U,475
70
- ocrd_network/models/job.py,sha256=6NxcNIUHMS9Ft5UGWegngB6uNUOnE_6nOnRGKFbjI6A,4243
71
- ocrd_network/models/messages.py,sha256=XnyLMX77NchgmtKJRtqtBFsk_sCR4OGEuWm_d3uDkj8,657
76
+ ocrd_network/models/job.py,sha256=9bwp8DFoRH96WnRpkDV3XRfXCBiupzK6WXjqPsTcvLg,4440
77
+ ocrd_network/models/messages.py,sha256=OUDTjUiaATStsSAHCEDilUhBSruPsjpBtIBsllqN2Z0,672
72
78
  ocrd_network/models/workflow.py,sha256=GL8q7RX9fGdXG3iVyJpCeLXbWa-2qI_SIxqhzxs9VK8,189
73
- ocrd_network/models/workspace.py,sha256=7kjCTY0ixqcyJP9eHnptkrJnPTCD3zFNfhApZz9w6OU,1568
79
+ ocrd_network/models/workspace.py,sha256=rZcBWNlQOZX2KukP79IDRrXJvZ-H5pPH3WpPuw72HBM,1596
74
80
  ocrd_network/rabbitmq_utils/__init__.py,sha256=XLIqZhfin4I4m80G9B__UcP45Lz10_mEpMYLXGOByUk,741
75
81
  ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
76
82
  ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
@@ -80,20 +86,20 @@ ocrd_network/rabbitmq_utils/ocrd_messages.py,sha256=wwzfMWbXmOFo_nd32_XySCso91_U
80
86
  ocrd_network/rabbitmq_utils/publisher.py,sha256=mw4XQQhRE1xUQVgEUseyG845iIgVO-9GdGwNH6nUFms,2433
81
87
  ocrd_network/runtime_data/__init__.py,sha256=PnWuuagElbkTzGtPWQEk5wlFtDxqT7B48S0Zrgt8H68,320
82
88
  ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
83
- ocrd_network/runtime_data/connection_clients.py,sha256=DZyAvkNyMaIddGJs56s2pMP_fK-XWAtICxk1cjvkWYM,4207
89
+ ocrd_network/runtime_data/connection_clients.py,sha256=HKf_aSfwg11JeH6qiQXnqxbnvNgCAMRnVIpj30k93Bc,4207
84
90
  ocrd_network/runtime_data/deployer.py,sha256=j3tcauURZtu7MKcEIE9B5eMCMSYMbxhB8LmtK72Zk1c,5314
85
- ocrd_network/runtime_data/hosts.py,sha256=n0azh_1XBc8-F9GtX-8q61iFzfxZjtjG-2D8qroihGA,7233
86
- ocrd_network/runtime_data/network_agents.py,sha256=UKcAO1lMZkXgmhMbltBnRo1j7QVSbfDTxA52YrNqP2Y,3891
91
+ ocrd_network/runtime_data/hosts.py,sha256=P1bLh1NjgL-ajgP-VhGCACvy6rgJ5nZhGKsHaldzatk,8921
92
+ ocrd_network/runtime_data/network_agents.py,sha256=wwN7IJei4UdlyfhjvdmB5TB4O0Gn8icSkArJe-suvAY,4523
87
93
  ocrd_network/runtime_data/network_services.py,sha256=5aH3RNGCi1fBuSdRp_Xz0MzyD_FmnvPnaBYAiYY3gp4,7891
88
- ocrd_utils/__init__.py,sha256=gcO26xJ6dIUtJIvAr8wOe3CM4b7Revn07-DwEureoEc,5973
94
+ ocrd_utils/__init__.py,sha256=Cl_lrZxjXuTZ_me4I_lpaFNTpSdacWhQetOtHdrkUsU,6057
89
95
  ocrd_utils/config.py,sha256=Oe8JBGb8r4z274XNWcdMV-GApzxmAYO8hHmbAV5bXf8,12609
90
- ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
96
+ ocrd_utils/constants.py,sha256=6lqMLeJdkFBlvGVmGjcExWbRKzNU6QT0kADBb5BkcBc,3464
91
97
  ocrd_utils/deprecate.py,sha256=luAqGWUSF-9DHmTd2lDiQoQPA5SrJazdoDPQYQ6A7Z4,1029
92
98
  ocrd_utils/image.py,sha256=tG5WnNtrrvGjm2-r6NVs1Jm7z8fee3MuLKotAD6C2RU,24818
93
99
  ocrd_utils/introspect.py,sha256=LPhgcUuoicQcURDCWlCpSdbfVyxID5vmQPXJ9vzuYV0,1977
94
- ocrd_utils/logging.py,sha256=Kj_z92pXbzWmc0jMJ299Pup9hfcnzJH8ltvI9w7STZc,7824
100
+ ocrd_utils/logging.py,sha256=-cCi_9kIzmLUixfnDcx2jq9IQuwMqrU-71RJhKOQilQ,7929
95
101
  ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
96
- ocrd_utils/os.py,sha256=GstXB4i3kDBy7PXU-TaPYV4BI-lkqd_cYKl6uDkVMkw,9829
102
+ ocrd_utils/os.py,sha256=Fmy-Q4OMGlbHLMH9jhPRAxRN5curyEXrYoWJxztIl1w,13756
97
103
  ocrd_utils/str.py,sha256=4P0MdX0LCTqDTnsi_y5wNOBXW_TuTFANF7NYRXjo4x0,10136
98
104
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
99
105
  ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
@@ -103,7 +109,7 @@ ocrd_validators/message_processing.schema.yml,sha256=HL7o96-7ejslVMXcp16sbo5IjfU
103
109
  ocrd_validators/message_result.schema.yml,sha256=G6vt_JgIU7OGSaHj-2Jna6KWQ3bFWol5tnBArWEiVjM,681
104
110
  ocrd_validators/mets.xsd,sha256=0Wrs9bObn0n-yEEIWyguIcUUuuP6KMEjD4I_p1_UlwY,138290
105
111
  ocrd_validators/ocrd_network_message_validator.py,sha256=oafNWOjieBmTHFfYeCtyFFpW1gI0lDT6ycRr5Kvmfq0,561
106
- ocrd_validators/ocrd_tool.schema.yml,sha256=BQkRIRDbn9B8gFeVxz_EpNdleh_x2dCtIpJEC4HqFHw,10125
112
+ ocrd_validators/ocrd_tool.schema.yml,sha256=fDNr-QdEOBtYbz8aHmjdOUirPBKr3vfLUDtC88gu75U,10231
107
113
  ocrd_validators/ocrd_tool_validator.py,sha256=0DWuyyOSbdbrrQ5kEfWZv_qp5rSmLzmFMUKcPGfCBgM,749
108
114
  ocrd_validators/ocrd_zip_validator.py,sha256=t-cYIZ5llZSQ2EspFzm0m-FajkLRfAFTISmXe27wMtA,3720
109
115
  ocrd_validators/page.xsd,sha256=abQ8C3gRLPMFm8lH62aTCfvTIWI23TpgEDcaW9YCt7I,85770
@@ -117,9 +123,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
117
123
  ocrd_validators/xsd_mets_validator.py,sha256=YgiuNtwNDtn3LuvdFFscnmsGREF_wQ4wtA76yE2Iljw,469
118
124
  ocrd_validators/xsd_page_validator.py,sha256=ggt-nmaz-DDyAPwm3ZMVvtChuV2BJ2ZEEbWpePL9vTk,469
119
125
  ocrd_validators/xsd_validator.py,sha256=ahJo_oVvTK_JB0Cu4CkMC8l_gbzsyW91AxGtelMjqrg,2115
120
- ocrd-3.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
121
- ocrd-3.6.0.dist-info/METADATA,sha256=Qc1Nap_yo-Y3_8FYaVIvGXV79q4NKrCZs6h4BEKXIg4,10523
122
- ocrd-3.6.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
123
- ocrd-3.6.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
124
- ocrd-3.6.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
125
- ocrd-3.6.0.dist-info/RECORD,,
126
+ ocrd-3.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
127
+ ocrd-3.8.0.dist-info/METADATA,sha256=ieqTzchmz7OXQV6vlize7XzWDzWEk5ypLXtn4VTduHY,11396
128
+ ocrd-3.8.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
129
+ ocrd-3.8.0.dist-info/entry_points.txt,sha256=CI-NoDR1BYmsuAsJmPAn4NrN9guzdedHGUbC8QSmdGs,266
130
+ ocrd-3.8.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
131
+ ocrd-3.8.0.dist-info/RECORD,,
@@ -1,4 +1,6 @@
1
1
  [console_scripts]
2
2
  ocrd = ocrd.cli:cli
3
+ ocrd-command = ocrd.processor.builtin.shell_processor:cli
3
4
  ocrd-dummy = ocrd.processor.builtin.dummy_processor:cli
4
5
  ocrd-filter = ocrd.processor.builtin.filter_processor:cli
6
+ ocrd-merge = ocrd.processor.builtin.merge_processor:cli
ocrd_models/ocrd_agent.py CHANGED
@@ -43,7 +43,7 @@ class OcrdAgent():
43
43
  self.otherrole = otherrole
44
44
  self.notes = notes
45
45
 
46
- def __str__(self):
46
+ def __repr__(self):
47
47
  """
48
48
  String representation
49
49
  """
@@ -142,7 +142,7 @@ class OcrdAgent():
142
142
  """
143
143
  el_notes = self._el.findall(TAG_METS_NOTE)
144
144
  if el_notes is not None:
145
- return [(note.attrib, note.text)
145
+ return [(dict(note.attrib), note.text)
146
146
  for note in el_notes]
147
147
 
148
148
  @notes.setter
@@ -190,7 +190,7 @@ class ClientSideOcrdAgent():
190
190
  self.otherrole = otherrole
191
191
  self.notes = notes
192
192
 
193
- def __str__(self):
193
+ def __repr__(self):
194
194
  props = ', '.join([
195
195
  '='.join([k, getattr(self, k) if getattr(self, k) else '---'])
196
196
  for k in ['type', 'othertype', 'role', 'otherrole', 'name']
ocrd_network/__init__.py CHANGED
@@ -3,4 +3,5 @@ from .constants import JobState
3
3
  from .processing_server import ProcessingServer
4
4
  from .processing_worker import ProcessingWorker
5
5
  from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
6
+ from .resource_manager_server import ResourceManagerServer
6
7
  from .server_cache import CacheLockedPages, CacheProcessingRequests
@@ -1,9 +1,11 @@
1
1
  from .client import client_cli
2
2
  from .processing_server import processing_server_cli
3
3
  from .processing_worker import processing_worker_cli
4
+ from .resmgr_server import resource_manager_server_cli
4
5
 
5
6
  __all__ = [
6
7
  'client_cli',
7
8
  'processing_server_cli',
8
9
  'processing_worker_cli',
10
+ 'resource_manager_server_cli'
9
11
  ]
@@ -0,0 +1,23 @@
1
+ import click
2
+ from ocrd_network import ResourceManagerServer, ServerAddressParamType
3
+
4
+
5
+ @click.command('resmgr-server')
6
+ @click.option('-a', '--address',
7
+ help='The URL of the OCR-D resource manager server, format: host:port',
8
+ type=ServerAddressParamType(),
9
+ required=True)
10
+ def resource_manager_server_cli(address: str):
11
+ """
12
+ Start standalone REST API OCR-D Resource Manager Server
13
+ """
14
+ try:
15
+ # Note, the address is already validated with the type field
16
+ host, port = address.split(':')
17
+ resource_manager_server = ResourceManagerServer(
18
+ host = host,
19
+ port = int(port)
20
+ )
21
+ resource_manager_server.start()
22
+ except Exception as e:
23
+ raise Exception("OCR-D Resource Manager Server has failed with error") from e
ocrd_network/constants.py CHANGED
@@ -10,6 +10,8 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
10
10
  # Used as a placeholder to lock all pages when no page_id is specified
11
11
  SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
12
12
 
13
+ # TODO: Make this more configurable
14
+ RESOURCE_MANAGER_SERVER_PORT = 45555
13
15
 
14
16
  class StrEnum(str, Enum):
15
17
  def __str__(self):
@@ -48,6 +50,7 @@ class NetworkLoggingDirs(StrEnum):
48
50
  PROCESSING_JOBS = "processing_jobs"
49
51
  PROCESSING_SERVERS = "processing_servers"
50
52
  PROCESSING_WORKERS = "processing_workers"
53
+ RESOURCE_MANAGER_SERVERS = "resource_manager_servers"
51
54
 
52
55
 
53
56
  class ServerApiTags(StrEnum):
@@ -56,3 +56,8 @@ def get_processing_server_logging_file_path(pid: int) -> Path:
56
56
  def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Path:
57
57
  log_file: str = f"worker.{pid}.{processor_name}.log"
58
58
  return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSING_WORKERS), log_file)
59
+
60
+
61
+ def get_resource_manager_server_logging_file_path(pid: int) -> Path:
62
+ log_file: str = f"resource_manager_server.{pid}.log"
63
+ return Path(get_root_logging_dir(NetworkLoggingDirs.RESOURCE_MANAGER_SERVERS), log_file)
@@ -13,7 +13,7 @@ class PYJobInput(BaseModel):
13
13
  workspace_id: Optional[str] = None
14
14
  description: Optional[str] = None
15
15
  input_file_grps: List[str]
16
- output_file_grps: Optional[List[str]]
16
+ output_file_grps: Optional[List[str]] = None
17
17
  page_id: Optional[str] = None
18
18
  parameters: dict = {} # Always set to empty dict when None, otherwise it fails ocr-d-validation
19
19
  result_queue_name: Optional[str] = None
@@ -23,8 +23,8 @@ class PYJobInput(BaseModel):
23
23
  # If set, specifies a list of job ids this job depends on
24
24
  depends_on: Optional[List[str]] = None
25
25
 
26
- class Config:
27
- schema_extra = {
26
+ model_config = {
27
+ 'json_schema_extra': {
28
28
  'example': {
29
29
  'path_to_mets': '/path/to/mets.xml',
30
30
  'description': 'The description of this execution',
@@ -34,6 +34,7 @@ class PYJobInput(BaseModel):
34
34
  'parameters': {}
35
35
  }
36
36
  }
37
+ }
37
38
 
38
39
 
39
40
  class PYJobOutput(BaseModel):
@@ -42,12 +43,12 @@ class PYJobOutput(BaseModel):
42
43
  job_id: str
43
44
  processor_name: str
44
45
  state: JobState = JobState.unset
45
- path_to_mets: Optional[str]
46
- workspace_id: Optional[str]
46
+ path_to_mets: Optional[str] = None
47
+ workspace_id: Optional[str] = None
47
48
  input_file_grps: List[str]
48
- output_file_grps: Optional[List[str]]
49
+ output_file_grps: Optional[List[str]] = None
49
50
  page_id: Optional[str] = None
50
- log_file_path: Optional[str]
51
+ log_file_path: Optional[str] = None
51
52
 
52
53
 
53
54
  class DBProcessorJob(Document):
@@ -55,22 +56,22 @@ class DBProcessorJob(Document):
55
56
  """
56
57
  job_id: str
57
58
  processor_name: str
58
- path_to_mets: Optional[str]
59
- workspace_id: Optional[str]
60
- description: Optional[str]
59
+ path_to_mets: Optional[str] = None
60
+ workspace_id: Optional[str] = None
61
+ description: Optional[str] = None
61
62
  state: JobState = JobState.unset
62
63
  input_file_grps: List[str]
63
- output_file_grps: Optional[List[str]]
64
- page_id: Optional[str]
65
- parameters: Optional[dict]
66
- depends_on: Optional[List[str]]
67
- result_queue_name: Optional[str]
68
- callback_url: Optional[str]
69
- internal_callback_url: Optional[str]
70
- start_time: Optional[datetime]
71
- end_time: Optional[datetime]
72
- exec_time: Optional[str]
73
- log_file_path: Optional[str]
64
+ output_file_grps: Optional[List[str]] = None
65
+ page_id: Optional[str] = None
66
+ parameters: Optional[dict] = None
67
+ depends_on: Optional[List[str]] = None
68
+ result_queue_name: Optional[str] = None
69
+ callback_url: Optional[str] = None
70
+ internal_callback_url: Optional[str] = None
71
+ start_time: Optional[datetime] = None
72
+ end_time: Optional[datetime] = None
73
+ exec_time: Optional[str] = None
74
+ log_file_path: Optional[str] = None
74
75
 
75
76
  class Settings:
76
77
  use_enum_values = True
@@ -99,9 +100,9 @@ class PYWorkflowJobOutput(BaseModel):
99
100
  page_id: str
100
101
  page_wise: bool = False
101
102
  job_id: str
102
- path_to_mets: Optional[str]
103
- workspace_id: Optional[str]
104
- description: Optional[str]
103
+ path_to_mets: Optional[str] = None
104
+ workspace_id: Optional[str] = None
105
+ description: Optional[str] = None
105
106
 
106
107
 
107
108
  class DBWorkflowJob(Document):
@@ -114,10 +115,10 @@ class DBWorkflowJob(Document):
114
115
  # key: page_id
115
116
  # value: List of and processing job ids sorted in dependency order
116
117
  processing_job_ids: Dict
117
- path_to_mets: Optional[str]
118
- workspace_id: Optional[str]
119
- description: Optional[str]
120
- workflow_callback_url: Optional[str]
118
+ path_to_mets: Optional[str] = None
119
+ workspace_id: Optional[str] = None
120
+ description: Optional[str] = None
121
+ workflow_callback_url: Optional[str] = None
121
122
 
122
123
  class Settings:
123
124
  use_enum_values = True
@@ -11,8 +11,8 @@ class PYResultMessage(BaseModel):
11
11
  path_to_mets: Optional[str] = None
12
12
  workspace_id: Optional[str] = None
13
13
 
14
- class Config:
15
- schema_extra = {
14
+ model_config = {
15
+ "json_schema_extra": {
16
16
  "example": {
17
17
  "job_id": "d8e36726-ed28-5476-b83c-bc31d2eecf1f",
18
18
  "state": JobState.success,
@@ -20,3 +20,4 @@ class PYResultMessage(BaseModel):
20
20
  "workspace_id": "c7f25615-fc17-4365-a74d-ad20e1ddbd0e"
21
21
  }
22
22
  }
23
+ }
@@ -25,10 +25,10 @@ class DBWorkspace(Document):
25
25
  workspace_mets_path: str
26
26
  ocrd_identifier: str
27
27
  bagit_profile_identifier: str
28
- ocrd_base_version_checksum: Optional[str]
29
- ocrd_mets: Optional[str]
30
- bag_info_adds: Optional[dict]
31
- mets_server_url: Optional[str]
28
+ ocrd_base_version_checksum: Optional[str] = None
29
+ ocrd_mets: Optional[str] = None
30
+ bag_info_adds: Optional[dict] = None
31
+ mets_server_url: Optional[str] = None
32
32
  deleted: bool = False
33
33
 
34
34
  class Settings: