ocrd 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. ocrd/cli/__init__.py +6 -2
  2. ocrd/cli/bashlib.py +7 -2
  3. ocrd/cli/log.py +7 -2
  4. ocrd/cli/network.py +0 -2
  5. ocrd/cli/ocrd_tool.py +26 -4
  6. ocrd/cli/process.py +1 -0
  7. ocrd/cli/resmgr.py +0 -1
  8. ocrd/cli/validate.py +32 -13
  9. ocrd/cli/workspace.py +125 -52
  10. ocrd/cli/zip.py +13 -4
  11. ocrd/decorators/__init__.py +28 -52
  12. ocrd/decorators/loglevel_option.py +4 -0
  13. ocrd/decorators/mets_find_options.py +2 -1
  14. ocrd/decorators/ocrd_cli_options.py +3 -7
  15. ocrd/decorators/parameter_option.py +12 -11
  16. ocrd/lib.bash +6 -13
  17. ocrd/mets_server.py +6 -10
  18. ocrd/processor/base.py +88 -71
  19. ocrd/processor/builtin/dummy_processor.py +7 -4
  20. ocrd/processor/builtin/filter_processor.py +3 -2
  21. ocrd/processor/helpers.py +5 -6
  22. ocrd/processor/ocrd_page_result.py +7 -5
  23. ocrd/resolver.py +42 -32
  24. ocrd/task_sequence.py +11 -4
  25. ocrd/workspace.py +64 -54
  26. ocrd/workspace_backup.py +3 -0
  27. ocrd/workspace_bagger.py +15 -8
  28. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/METADATA +3 -2
  29. ocrd-3.6.0.dist-info/RECORD +125 -0
  30. ocrd_modelfactory/__init__.py +4 -2
  31. ocrd_models/constants.py +18 -1
  32. ocrd_models/ocrd_agent.py +1 -1
  33. ocrd_models/ocrd_exif.py +7 -3
  34. ocrd_models/ocrd_file.py +24 -19
  35. ocrd_models/ocrd_mets.py +90 -67
  36. ocrd_models/ocrd_page.py +17 -13
  37. ocrd_models/ocrd_xml_base.py +1 -0
  38. ocrd_models/report.py +2 -1
  39. ocrd_models/utils.py +4 -3
  40. ocrd_models/xpath_functions.py +3 -1
  41. ocrd_network/__init__.py +1 -2
  42. ocrd_network/cli/__init__.py +0 -2
  43. ocrd_network/cli/client.py +122 -50
  44. ocrd_network/cli/processing_server.py +1 -2
  45. ocrd_network/client.py +2 -2
  46. ocrd_network/client_utils.py +30 -13
  47. ocrd_network/constants.py +1 -6
  48. ocrd_network/database.py +3 -3
  49. ocrd_network/logging_utils.py +2 -7
  50. ocrd_network/models/__init__.py +0 -2
  51. ocrd_network/models/job.py +2 -5
  52. ocrd_network/models/workspace.py +1 -1
  53. ocrd_network/process_helpers.py +54 -17
  54. ocrd_network/processing_server.py +63 -114
  55. ocrd_network/processing_worker.py +6 -5
  56. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  57. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  58. ocrd_network/runtime_data/__init__.py +1 -2
  59. ocrd_network/runtime_data/deployer.py +12 -85
  60. ocrd_network/runtime_data/hosts.py +61 -130
  61. ocrd_network/runtime_data/network_agents.py +7 -31
  62. ocrd_network/runtime_data/network_services.py +1 -1
  63. ocrd_network/server_cache.py +1 -1
  64. ocrd_network/server_utils.py +13 -52
  65. ocrd_network/utils.py +1 -0
  66. ocrd_utils/__init__.py +4 -4
  67. ocrd_utils/config.py +86 -76
  68. ocrd_utils/deprecate.py +3 -0
  69. ocrd_utils/image.py +51 -23
  70. ocrd_utils/introspect.py +8 -3
  71. ocrd_utils/logging.py +12 -7
  72. ocrd_utils/os.py +16 -3
  73. ocrd_utils/str.py +32 -16
  74. ocrd_validators/json_validator.py +4 -1
  75. ocrd_validators/ocrd_tool_validator.py +2 -1
  76. ocrd_validators/ocrd_zip_validator.py +5 -4
  77. ocrd_validators/page_validator.py +21 -9
  78. ocrd_validators/parameter_validator.py +3 -2
  79. ocrd_validators/processing_server_config.schema.yml +1 -33
  80. ocrd_validators/resource_list_validator.py +3 -1
  81. ocrd_validators/workspace_validator.py +30 -20
  82. ocrd_validators/xsd_mets_validator.py +2 -1
  83. ocrd_validators/xsd_page_validator.py +2 -1
  84. ocrd_validators/xsd_validator.py +4 -2
  85. ocrd-3.5.0.dist-info/RECORD +0 -128
  86. ocrd_network/cli/processor_server.py +0 -31
  87. ocrd_network/models/ocrd_tool.py +0 -12
  88. ocrd_network/processor_server.py +0 -255
  89. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/LICENSE +0 -0
  90. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/WHEEL +0 -0
  91. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/entry_points.txt +0 -0
  92. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ from ocrd_models import ValidationReport
9
9
 
10
10
  from .constants import XSD_PATHS
11
11
 
12
+
12
13
  #
13
14
  # -------------------------------------------------
14
15
  #
@@ -35,7 +36,7 @@ class XsdValidator():
35
36
  doc (etree.ElementTree|str|bytes):
36
37
  schema_url (str): URI of XML schema to validate against.
37
38
  """
38
- return cls.instance(schema_url)._validate(doc) # pylint: disable=protected-access
39
+ return cls.instance(schema_url)._validate(doc) # pylint: disable=protected-access
39
40
 
40
41
  def __init__(self, schema_url):
41
42
  """
@@ -55,7 +56,8 @@ class XsdValidator():
55
56
  Do the actual validation.
56
57
 
57
58
  Arguments:
58
- doc (etree.ElementTree|str|bytes|pathlib.Path): the document. if etree: us as-is. if str/bytes: parse as XML string. If Path: read_text on it
59
+ doc (etree.ElementTree|str|bytes|pathlib.Path): the document.
60
+ (If etree: us as-is. If str/bytes: parse as XML string. If Path: read_text on it.)
59
61
 
60
62
  Returns: ValidationReport
61
63
  """
@@ -1,128 +0,0 @@
1
- ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
- ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
- ocrd/lib.bash,sha256=le6XqAOEacdjP3JNSlPkxwRH1y0oVjNQM2tX5d6QFO4,10901
4
- ocrd/mets_server.py,sha256=o01N5vAdPeu-xCgngcMRQm0bzBOhn_IFMV0AbeXDu9g,22491
5
- ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
- ocrd/resolver.py,sha256=A7BrZlUGrfJye-etaEuT-fdJFgvQcCxWovjufT-WmRY,15119
7
- ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
- ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
9
- ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
- ocrd/workspace.py,sha256=eLuGSJtOh3y2miKgcF8219YH1RkAaEi-qwXHarz8O8k,64916
11
- ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
- ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
- ocrd/cli/__init__.py,sha256=LpQb8ne1nzAq2j52lGWDTZlBCmrLwUsz17PTwJkWNcU,2884
14
- ocrd/cli/bashlib.py,sha256=ypFBM3-IULz_IEBx0Y04eGt9VbQWwEWm4ujm9g_hPWY,6009
15
- ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
- ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
17
- ocrd/cli/ocrd_tool.py,sha256=MLTqbtBCw8wBSScv2S4_xp5Jyz5fwD7BzUJxI9R4TBw,7653
18
- ocrd/cli/process.py,sha256=8KD0i7LT01H9u5CC1vktYMEVpS67da_rp_09_EOECmw,1233
19
- ocrd/cli/resmgr.py,sha256=mk8KZweC_7ENAFnC6FvFf7Zv_W1wqJTmk0EMd9XSvf4,10132
20
- ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
- ocrd/cli/workspace.py,sha256=t40r3tnzz0VivhmT0HSeA5-2xdvUleRYvniqLIBGoWs,40501
22
- ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
- ocrd/decorators/__init__.py,sha256=n2Lb1WLXGlvPrhNTSGZYRqugpa__MZSWV546EmQnTtc,7678
24
- ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
25
- ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
26
- ocrd/decorators/ocrd_cli_options.py,sha256=psS7u42mXTOWIXQd9kcrgW7kDnFURHbmZ0946aqBz3A,2659
27
- ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
- ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=_h0V5FevEPLb1q0zGtShuKXRj_tOWhD0M7_ufn34MPc,60476
30
- ocrd/processor/helpers.py,sha256=WFdC5zeB8F7T0FkpJwfTqWsSPNRtBCBUmFLgixw-rYs,10999
31
- ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
- ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
34
- ocrd/processor/builtin/filter_processor.py,sha256=nDnXjo2tDCjodURb8VlB4VJtRwWGx261jH7AG91edSk,4317
35
- ocrd/processor/builtin/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=NgMAXN1AQpGk4Ss73ThDY4QyFPKhj54qcrdeCGwTb10,2339
37
- ocrd_modelfactory/__init__.py,sha256=NyJT1uSvmeEwibRFOkh0AEoVnYfP0mzxU--pP23B-TQ,4404
38
- ocrd_models/__init__.py,sha256=A0aj0mOraNb-xfiUueACdoaqISnp0qH-F49nTJg2vCs,380
39
- ocrd_models/constants.py,sha256=z5XAFMgz3pttMJOHVzTWNZr3ZqMjonVIDmXk3GQTJ30,6954
40
- ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,1203
41
- ocrd_models/ocrd_agent.py,sha256=E9OtDhz9UfKb6ou2qvsuCL9NlO1V6zMb0s8nVq8dVos,5609
42
- ocrd_models/ocrd_exif.py,sha256=wRSprHxCy9LCXw41Fi9kp-CbFc5NFX9ZFIFNszB41qk,4585
43
- ocrd_models/ocrd_file.py,sha256=7lyHezuNnl2FEYV1lV35-QTCrgYAL-3wO2ulFUNq2Ak,9717
44
- ocrd_models/ocrd_mets.py,sha256=FHZnztf1cfWim_sAtTVFXt2ZuQx2HVDTQ1xIobIVIeQ,50540
45
- ocrd_models/ocrd_page.py,sha256=TTCnvpKGyZx1dqH8LnDiVVVPjU6emWGVLO_4o9rQHtw,6233
46
- ocrd_models/ocrd_page_generateds.py,sha256=IWoN3V-v3C4JgyPaFh9OQC87ob__wUP1Q6ELBxhLA1w,841794
47
- ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,1641
48
- ocrd_models/report.py,sha256=luZxvzAAQyGYOlRNSJQUIUIANG81iGmBW5ag-uXxKCA,2026
49
- ocrd_models/utils.py,sha256=0_WHf5NEn1WC8MKJc6X_RK8gW-70Z09_mslkKOj7uF8,2369
50
- ocrd_models/xpath_functions.py,sha256=AwR8tHf56-mmIksnw_GeOQ760sxNHqK92T7z9OfsEEs,1437
51
- ocrd_network/__init__.py,sha256=gMejC614J5PPGgXDKBiQS0jt-Jx8qOrLbWH7zt8x8Gs,374
52
- ocrd_network/client.py,sha256=rzqtamZ8krRRy-QTO-AeWH8Lr3HhRiQe2R1-Lovd40g,3020
53
- ocrd_network/client_utils.py,sha256=VVZMNBgGznh41exZ78S48X3DDwHcWTuOq-LNdxjRvak,5002
54
- ocrd_network/constants.py,sha256=AAcE6zZQNcNp2oqPD6oIgoVLSs4IHTkg8AS92WCQ6Xo,1968
55
- ocrd_network/database.py,sha256=fcft7vdRDoR7vmPL1xNYTIeOg5DwRPcggwYDYxLy5ik,10706
56
- ocrd_network/logging_utils.py,sha256=ijWpM8B943Jx6F0NeK3ggni0198UYjM5NCkYpARLk_E,2472
57
- ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
58
- ocrd_network/process_helpers.py,sha256=KpkInXsa5bgrxvTOouyMJ0NgJhaz0J9Gjs5sZHBcH64,2373
59
- ocrd_network/processing_server.py,sha256=qBiYk4wgTLqhHvbmDWu_F626BfSfyvkoCD-i0ZwsBSE,42109
60
- ocrd_network/processing_worker.py,sha256=fhIvmDQAYOkHYtUs5IB8Jk2lOKUTIBk3DskAsFloijA,12591
61
- ocrd_network/processor_server.py,sha256=2CD9TlinXk6x1jFjP5VWOXgJe8lAQdxc9zjZuVy3EOw,9931
62
- ocrd_network/server_cache.py,sha256=LpvJ-_Lbaeo4M3t8rZDdm9DAErZr8lDlma6pYc0m7aQ,13149
63
- ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,12222
64
- ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
65
- ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
66
- ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
67
- ocrd_network/cli/client.py,sha256=gFEXjz-d074CpvimqaM4kJRbJVNYRAOK-jsUl2EAUVs,8424
68
- ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
69
- ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
70
- ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
71
- ocrd_network/models/__init__.py,sha256=AcpZrenygOudMi47Wfr1UCrpbghq2gP8aMAKodgEIFM,527
72
- ocrd_network/models/job.py,sha256=2-E1cKfdTC3Y5AUemCLz1a_t7BlT8BNF6teAC0f8J5o,4442
73
- ocrd_network/models/messages.py,sha256=XnyLMX77NchgmtKJRtqtBFsk_sCR4OGEuWm_d3uDkj8,657
74
- ocrd_network/models/ocrd_tool.py,sha256=WhxSwDyEXtF03Cu8u2tLZcYM0tCacL4PX1GveAxnWEc,293
75
- ocrd_network/models/workflow.py,sha256=GL8q7RX9fGdXG3iVyJpCeLXbWa-2qI_SIxqhzxs9VK8,189
76
- ocrd_network/models/workspace.py,sha256=42G8RoS0wJ902LHyM09NIaqHoRdmP-Oib3d5I07sTBQ,1579
77
- ocrd_network/rabbitmq_utils/__init__.py,sha256=8MRawAiSpZ9IQUBcLAS7sYOzNC9sI11eZOLk7k92_ZQ,691
78
- ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
79
- ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
80
- ocrd_network/rabbitmq_utils/consumer.py,sha256=3WeryDmo0dSD9U0eLODbDElscvhEYjNeCBIewQHYfws,2488
81
- ocrd_network/rabbitmq_utils/helpers.py,sha256=y8FTC1ml_IBNcFo14GgCNtNRxYDotQn7U14HmTkv6h0,4874
82
- ocrd_network/rabbitmq_utils/ocrd_messages.py,sha256=wwzfMWbXmOFo_nd32_XySCso91_Ul-aGm_GhGncNxD4,4419
83
- ocrd_network/rabbitmq_utils/publisher.py,sha256=mw4XQQhRE1xUQVgEUseyG845iIgVO-9GdGwNH6nUFms,2433
84
- ocrd_network/runtime_data/__init__.py,sha256=3jYkmT4mxMUcpbDaSw7Ld0KTedGEx_5vUQPDjwUyJZc,367
85
- ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
86
- ocrd_network/runtime_data/connection_clients.py,sha256=DZyAvkNyMaIddGJs56s2pMP_fK-XWAtICxk1cjvkWYM,4207
87
- ocrd_network/runtime_data/deployer.py,sha256=LkDUG0uJf_V4SteiOM3EWwhKtdANCjmAOEAJJDshN30,9111
88
- ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZSj_WE,12185
89
- ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
90
- ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
91
- ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
92
- ocrd_utils/config.py,sha256=PGRnqrDT7lWJCd9ezoAEBJ5CyVJfFnvCEPTLjtKduOQ,12143
93
- ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
94
- ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
95
- ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
96
- ocrd_utils/introspect.py,sha256=gfBlmeEFuRmRUSgdSK0jOxRpYqDRXl2IAE6gv2MZ6as,1977
97
- ocrd_utils/logging.py,sha256=XYTL7DxUvdX4V56jhAYH6PkhjMFOmaa0kf_XkhSTTe0,7816
98
- ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
99
- ocrd_utils/os.py,sha256=tMjikpVXJ8sCgYBOrgjgT3vlR2Pok39nSKysYc6mUQ4,9863
100
- ocrd_utils/str.py,sha256=cRgqYILDGOAqWr0qrCrV52I3y4wvpwDVtnBGEUjXNS4,10116
101
- ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
102
- ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
103
- ocrd_validators/constants.py,sha256=FLP57T3F39weka_XovG40RgVMW1GunnbK04QRQ9tmlE,1802
104
- ocrd_validators/json_validator.py,sha256=jkVYwBtXwoubcNbLFWy8kC-DRqdf-9ryoqEf5HHi3Ds,2651
105
- ocrd_validators/message_processing.schema.yml,sha256=HL7o96-7ejslVMXcp16sbo5IjfUcW0wH8rPXHXdTyyA,1947
106
- ocrd_validators/message_result.schema.yml,sha256=G6vt_JgIU7OGSaHj-2Jna6KWQ3bFWol5tnBArWEiVjM,681
107
- ocrd_validators/mets.xsd,sha256=0Wrs9bObn0n-yEEIWyguIcUUuuP6KMEjD4I_p1_UlwY,138290
108
- ocrd_validators/ocrd_network_message_validator.py,sha256=oafNWOjieBmTHFfYeCtyFFpW1gI0lDT6ycRr5Kvmfq0,561
109
- ocrd_validators/ocrd_tool.schema.yml,sha256=BQkRIRDbn9B8gFeVxz_EpNdleh_x2dCtIpJEC4HqFHw,10125
110
- ocrd_validators/ocrd_tool_validator.py,sha256=CvfUHC0vJsPnmFBNf4ndQMXX6pcERCjJMAWk2L-T_ww,747
111
- ocrd_validators/ocrd_zip_validator.py,sha256=udAImFFliJY3y1FcKwZ52rhpjYB2Iv491hWDxdet8w0,3685
112
- ocrd_validators/page.xsd,sha256=abQ8C3gRLPMFm8lH62aTCfvTIWI23TpgEDcaW9YCt7I,85770
113
- ocrd_validators/page_validator.py,sha256=0fI278TLTsrw6htcwk7d9WkTFcbHNip87VMS2uw4Bzc,21765
114
- ocrd_validators/parameter_validator.py,sha256=_5Y3IS24Sf_xHBkB3TE3jB9VTCbbjWO8bSwcQDP0ewI,1365
115
- ocrd_validators/processing_server_config.schema.yml,sha256=8NQbhSshm1exTvbdYiu694rZZ-Xe70_vQtsJ0nd7ZCM,5432
116
- ocrd_validators/processing_server_config_validator.py,sha256=lQ2-ZxsvbFki_SvE_N4_1ptBnBHcwOTJ5grtL2G9F8A,810
117
- ocrd_validators/resource_list_validator.py,sha256=cFMj0n_x-tjhuNUpjgEvPP8iPVm7lme9TWAaqATasV0,776
118
- ocrd_validators/workspace_validator.py,sha256=JNPsRVPgQI0vsaxcs_c3qj22GagdZcgO3v9u3sbBbBI,20340
119
- ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,3180
120
- ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
121
- ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
122
- ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
123
- ocrd-3.5.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
- ocrd-3.5.0.dist-info/METADATA,sha256=t6X-RzqcEpiZDXRaRR0kSfU6hgu4fAa5qLpGVD4Lhjs,10442
125
- ocrd-3.5.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
126
- ocrd-3.5.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
- ocrd-3.5.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
- ocrd-3.5.0.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- import click
2
- from ocrd_network import DatabaseParamType, ProcessorServer, ServerAddressParamType
3
-
4
-
5
- @click.command('processor-server')
6
- @click.argument('processor_name', required=True, type=click.STRING)
7
- @click.option('-a', '--address',
8
- help='The URL of the processor server, format: host:port',
9
- type=ServerAddressParamType(),
10
- required=True)
11
- @click.option('-d', '--database',
12
- default="mongodb://localhost:27018",
13
- help='The URL of the MongoDB, format: mongodb://host:port',
14
- type=DatabaseParamType(),
15
- required=True)
16
- def processor_server_cli(processor_name: str, address: str, database: str):
17
- """
18
- Start Processor Server
19
- (standalone REST API OCR-D processor)
20
- """
21
- try:
22
- # Note, the address is already validated with the type field
23
- host, port = address.split(':')
24
- processor_server = ProcessorServer(
25
- mongodb_addr=database,
26
- processor_name=processor_name,
27
- processor_class=None # For readability purposes assigned here
28
- )
29
- processor_server.run_server(host=host, port=int(port))
30
- except Exception as e:
31
- raise Exception("Processor server has failed with error") from e
@@ -1,12 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import List, Optional
3
-
4
-
5
- class PYOcrdTool(BaseModel):
6
- executable: str
7
- categories: List[str]
8
- description: str
9
- input_file_grp: List[str]
10
- output_file_grp: Optional[List[str]]
11
- steps: List[str]
12
- parameters: Optional[dict] = None
@@ -1,255 +0,0 @@
1
- from datetime import datetime
2
- from os import getpid
3
- from subprocess import run as subprocess_run, PIPE
4
- from uvicorn import run
5
-
6
- from fastapi import APIRouter, BackgroundTasks, FastAPI, status
7
- from fastapi.responses import FileResponse
8
-
9
- from ocrd_utils import (
10
- initLogging,
11
- get_ocrd_tool_json,
12
- getLogger,
13
- parse_json_string_with_comments
14
- )
15
- from .constants import JobState, ServerApiTags
16
- from .database import (
17
- DBProcessorJob,
18
- db_get_workspace,
19
- db_update_processing_job,
20
- db_get_processing_job,
21
- initiate_database
22
- )
23
- from .logging_utils import (
24
- configure_file_handler_with_formatter,
25
- get_processor_server_logging_file_path,
26
- get_processing_job_logging_file_path
27
- )
28
- from .models import PYJobInput, PYJobOutput, PYOcrdTool
29
- from .process_helpers import invoke_processor
30
- from .rabbitmq_utils import OcrdResultMessage
31
- from .server_utils import (
32
- _get_processor_job,
33
- _get_processor_job_log,
34
- raise_http_exception,
35
- validate_and_return_mets_path,
36
- validate_job_input
37
- )
38
- from .utils import calculate_execution_time, post_to_callback_url, generate_id
39
-
40
-
41
- class ProcessorServer(FastAPI):
42
- def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
43
- if not (processor_name or processor_class):
44
- raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
45
- super().__init__(
46
- on_startup=[self.on_startup],
47
- on_shutdown=[self.on_shutdown],
48
- title=f"Network agent - Processor Server",
49
- description="Network agent - Processor Server"
50
- )
51
- initLogging()
52
- self.log = getLogger("ocrd_network.processor_server")
53
- log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
54
- configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
55
-
56
- self.db_url = mongodb_addr
57
- self.processor_name = processor_name
58
- self.processor_class = processor_class
59
- self.ocrd_tool = None
60
- self.version = None
61
-
62
- self.version = self.get_version()
63
- self.ocrd_tool = self.get_ocrd_tool()
64
-
65
- if not self.ocrd_tool:
66
- raise Exception(f"The ocrd_tool is empty or missing")
67
-
68
- if not self.processor_name:
69
- self.processor_name = self.ocrd_tool["executable"]
70
-
71
- self.add_api_routes_processing()
72
- self.log.info(f"Initialized processor server: {processor_name}")
73
-
74
- async def on_startup(self):
75
- await initiate_database(db_url=self.db_url)
76
-
77
- async def on_shutdown(self) -> None:
78
- """
79
- TODO: Perform graceful shutdown operations here
80
- """
81
- pass
82
-
83
- def add_api_routes_processing(self):
84
- processing_router = APIRouter()
85
- processing_router.add_api_route(
86
- path="/info",
87
- endpoint=self.get_processor_info,
88
- methods=["GET"],
89
- tags=[ServerApiTags.PROCESSING],
90
- status_code=status.HTTP_200_OK,
91
- summary="Get information about this processor.",
92
- response_model=PYOcrdTool,
93
- response_model_exclude_unset=True,
94
- response_model_exclude_none=True
95
- )
96
- processing_router.add_api_route(
97
- path="/run",
98
- endpoint=self.create_processor_task,
99
- methods=["POST"],
100
- tags=[ServerApiTags.PROCESSING],
101
- status_code=status.HTTP_202_ACCEPTED,
102
- summary="Submit a job to this processor.",
103
- response_model=PYJobOutput,
104
- response_model_exclude_unset=True,
105
- response_model_exclude_none=True
106
- )
107
- processing_router.add_api_route(
108
- path="/job/{job_id}",
109
- endpoint=self.get_processor_job,
110
- methods=["GET"],
111
- tags=[ServerApiTags.PROCESSING],
112
- status_code=status.HTTP_200_OK,
113
- summary="Get information about a job based on its ID",
114
- response_model=PYJobOutput,
115
- response_model_exclude_unset=True,
116
- response_model_exclude_none=True
117
- )
118
- processing_router.add_api_route(
119
- path="/log/{job_id}",
120
- endpoint=self.get_processor_job_log,
121
- methods=["GET"],
122
- tags=[ServerApiTags.PROCESSING],
123
- status_code=status.HTTP_200_OK,
124
- summary="Get the log file of a job id"
125
- )
126
-
127
- async def get_processor_info(self):
128
- if not self.ocrd_tool:
129
- message = "Empty or missing ocrd tool json."
130
- raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
131
- return self.ocrd_tool
132
-
133
- # Note: The Processing server pushes to a queue, while
134
- # the Processor Server creates (pushes to) a background task
135
- async def create_processor_task(self, job_input: PYJobInput, background_tasks: BackgroundTasks):
136
- validate_job_input(self.log, self.processor_name, self.ocrd_tool, job_input)
137
- job_input.path_to_mets = await validate_and_return_mets_path(self.log, job_input)
138
-
139
- # The request is not forwarded from the Processing Server, assign a job_id
140
- if not job_input.job_id:
141
- job_id = generate_id()
142
- # Create a DB entry
143
- job = DBProcessorJob(
144
- **job_input.dict(exclude_unset=True, exclude_none=True),
145
- job_id=job_id,
146
- processor_name=self.processor_name,
147
- state=JobState.queued
148
- )
149
- await job.insert()
150
- else:
151
- job = await db_get_processing_job(job_input.job_id)
152
- # await self.run_processor_task(job=job)
153
- background_tasks.add_task(self.run_processor_task, job)
154
- return job.to_job_output()
155
-
156
- async def run_processor_task(self, job: DBProcessorJob):
157
- execution_failed = False
158
- start_time = datetime.now()
159
- job_log_file = get_processing_job_logging_file_path(job_id=job.job_id)
160
- await db_update_processing_job(
161
- job_id=job.job_id,
162
- state=JobState.running,
163
- start_time=start_time,
164
- log_file_path=job_log_file
165
- )
166
-
167
- mets_server_url = (await db_get_workspace(workspace_mets_path=job.path_to_mets)).mets_server_url
168
- try:
169
- invoke_processor(
170
- processor_class=self.processor_class,
171
- executable=self.processor_name,
172
- abs_path_to_mets=job.path_to_mets,
173
- input_file_grps=job.input_file_grps,
174
- output_file_grps=job.output_file_grps,
175
- page_id=job.page_id,
176
- parameters=job.parameters,
177
- mets_server_url=mets_server_url,
178
- log_filename=job_log_file,
179
- )
180
- except Exception as error:
181
- self.log.debug(f"processor_name: {self.processor_name}, path_to_mets: {job.path_to_mets}, "
182
- f"input_grps: {job.input_file_grps}, output_file_grps: {job.output_file_grps}, "
183
- f"page_id: {job.page_id}, parameters: {job.parameters}")
184
- self.log.exception(error)
185
- execution_failed = True
186
- end_time = datetime.now()
187
- exec_duration = calculate_execution_time(start_time, end_time)
188
- job_state = JobState.success if not execution_failed else JobState.failed
189
- await db_update_processing_job(
190
- job_id=job.job_id,
191
- state=job_state,
192
- end_time=end_time,
193
- exec_time=f"{exec_duration} ms"
194
- )
195
- result_message = OcrdResultMessage(
196
- job_id=job.job_id,
197
- state=job_state.value,
198
- path_to_mets=job.path_to_mets,
199
- # May not be always available
200
- workspace_id=job.workspace_id if job.workspace_id else ''
201
- )
202
- self.log.info(f"Result message: {result_message}")
203
- if job.callback_url:
204
- # If the callback_url field is set,
205
- # post the result message (callback to a user defined endpoint)
206
- post_to_callback_url(self.log, job.callback_url, result_message)
207
- if job.internal_callback_url:
208
- # If the internal callback_url field is set,
209
- # post the result message (callback to Processing Server endpoint)
210
- post_to_callback_url(self.log, job.internal_callback_url, result_message)
211
-
212
- def get_ocrd_tool(self):
213
- if self.ocrd_tool:
214
- return self.ocrd_tool
215
- if self.processor_class:
216
- # The way of accessing ocrd tool like in the line below may be problematic
217
- # ocrd_tool = self.processor_class(workspace=None, version=True).ocrd_tool
218
- ocrd_tool = parse_json_string_with_comments(
219
- subprocess_run(
220
- [self.processor_name, "--dump-json"],
221
- stdout=PIPE,
222
- check=True,
223
- universal_newlines=True
224
- ).stdout
225
- )
226
- else:
227
- ocrd_tool = get_ocrd_tool_json(self.processor_name)
228
- return ocrd_tool
229
-
230
- def get_version(self) -> str:
231
- if self.version:
232
- return self.version
233
-
234
- """
235
- if self.processor_class:
236
- # The way of accessing the version like in the line below may be problematic
237
- # version_str = self.processor_class(workspace=None, version=True).version
238
- return version_str
239
- """
240
- version_str = subprocess_run(
241
- [self.processor_name, "--version"],
242
- stdout=PIPE,
243
- check=True,
244
- universal_newlines=True
245
- ).stdout
246
- return version_str
247
-
248
- def run_server(self, host, port):
249
- run(self, host=host, port=port)
250
-
251
- async def get_processor_job(self, job_id: str) -> PYJobOutput:
252
- return await _get_processor_job(self.log, job_id)
253
-
254
- async def get_processor_job_log(self, job_id: str) -> FileResponse:
255
- return await _get_processor_job_log(self.log, job_id)
File without changes
File without changes