ocrd 3.0.0a2__tar.gz → 3.0.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {ocrd-3.0.0a2/src/ocrd.egg-info → ocrd-3.0.0b1}/PKG-INFO +1 -1
  2. ocrd-3.0.0b1/VERSION +1 -0
  3. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/__init__.py +34 -26
  4. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/bashlib.py +32 -18
  5. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/ocrd_tool.py +7 -5
  6. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/workspace.py +10 -8
  7. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/__init__.py +13 -7
  8. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/lib.bash +2 -0
  9. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/mets_server.py +2 -3
  10. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/base.py +163 -63
  11. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy_processor.py +4 -11
  12. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/helpers.py +23 -17
  13. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/ocrd_page_result.py +3 -3
  14. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resolver.py +0 -3
  15. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resource_manager.py +9 -5
  16. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace.py +8 -9
  17. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace_backup.py +1 -1
  18. {ocrd-3.0.0a2 → ocrd-3.0.0b1/src/ocrd.egg-info}/PKG-INFO +1 -1
  19. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/SOURCES.txt +1 -0
  20. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_modelfactory/__init__.py +1 -1
  21. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/constants.py +0 -1
  22. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_exif.py +2 -2
  23. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_file.py +2 -2
  24. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_mets.py +22 -22
  25. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_page.py +0 -1
  26. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_xml_base.py +2 -2
  27. ocrd-3.0.0b1/src/ocrd_network/cli/client.py +203 -0
  28. ocrd-3.0.0b1/src/ocrd_network/client.py +63 -0
  29. ocrd-3.0.0b1/src/ocrd_network/client_utils.py +101 -0
  30. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processing_server.py +1 -1
  31. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/deployer.py +12 -3
  32. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/server_utils.py +12 -10
  33. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/__init__.py +2 -0
  34. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/config.py +16 -2
  35. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/image.py +25 -25
  36. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/logging.py +17 -19
  37. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/os.py +4 -5
  38. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/str.py +10 -3
  39. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/json_validator.py +1 -3
  40. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_tool_validator.py +2 -2
  41. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/page_validator.py +56 -56
  42. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/parameter_validator.py +2 -2
  43. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/resource_list_validator.py +4 -3
  44. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/workspace_validator.py +21 -21
  45. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_validator.py +1 -1
  46. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_workspace.py +1 -1
  47. ocrd-3.0.0a2/VERSION +0 -1
  48. ocrd-3.0.0a2/src/ocrd_network/cli/client.py +0 -99
  49. ocrd-3.0.0a2/src/ocrd_network/client.py +0 -37
  50. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/LICENSE +0 -0
  51. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/MANIFEST.in +0 -0
  52. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README.md +0 -0
  53. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_bashlib.md +0 -0
  54. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd.md +0 -0
  55. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_modelfactory.md +0 -0
  56. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_models.md +0 -0
  57. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_network.md +0 -0
  58. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_utils.md +0 -0
  59. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_validators.md +0 -0
  60. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/pyproject.toml +0 -0
  61. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/requirements.txt +0 -0
  62. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/setup.cfg +0 -0
  63. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/__init__.py +0 -0
  64. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/log.py +0 -0
  65. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/network.py +0 -0
  66. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/process.py +0 -0
  67. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/resmgr.py +0 -0
  68. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/validate.py +0 -0
  69. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/zip.py +0 -0
  70. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/constants.py +0 -0
  71. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/loglevel_option.py +0 -0
  72. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/mets_find_options.py +0 -0
  73. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
  74. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/parameter_option.py +0 -0
  75. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/ocrd-all-tool.json +0 -0
  76. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/__init__.py +0 -0
  77. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/__init__.py +0 -0
  78. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
  79. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
  80. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resource_list.yml +0 -0
  81. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/task_sequence.py +0 -0
  82. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace_bagger.py +0 -0
  83. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/dependency_links.txt +0 -0
  84. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/entry_points.txt +0 -0
  85. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/requires.txt +0 -0
  86. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/top_level.txt +0 -0
  87. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/__init__.py +0 -0
  88. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/mets-empty.xml +0 -0
  89. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_agent.py +0 -0
  90. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_page_generateds.py +0 -0
  91. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/report.py +0 -0
  92. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/utils.py +0 -0
  93. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/__init__.py +0 -0
  94. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/__init__.py +0 -0
  95. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processing_server.py +0 -0
  96. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processing_worker.py +0 -0
  97. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processor_server.py +0 -0
  98. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/constants.py +0 -0
  99. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/database.py +0 -0
  100. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/logging_utils.py +0 -0
  101. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/__init__.py +0 -0
  102. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/job.py +0 -0
  103. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/messages.py +0 -0
  104. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/ocrd_tool.py +0 -0
  105. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/workflow.py +0 -0
  106. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/workspace.py +0 -0
  107. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/param_validators.py +0 -0
  108. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/process_helpers.py +0 -0
  109. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processing_worker.py +0 -0
  110. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processor_server.py +0 -0
  111. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
  112. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
  113. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
  114. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
  115. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
  116. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
  117. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
  118. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/__init__.py +0 -0
  119. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/config_parser.py +0 -0
  120. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
  121. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/hosts.py +0 -0
  122. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/network_agents.py +0 -0
  123. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/network_services.py +0 -0
  124. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/server_cache.py +0 -0
  125. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
  126. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/utils.py +0 -0
  127. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/constants.py +0 -0
  128. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/deprecate.py +0 -0
  129. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/introspect.py +0 -0
  130. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/ocrd_logging.conf +0 -0
  131. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/__init__.py +0 -0
  132. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/bagit-profile.yml +0 -0
  133. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/constants.py +0 -0
  134. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/message_processing.schema.yml +0 -0
  135. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/message_result.schema.yml +0 -0
  136. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/mets.xsd +0 -0
  137. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
  138. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
  139. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
  140. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/page.xsd +0 -0
  141. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
  142. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/processing_server_config_validator.py +0 -0
  143. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xlink.xsd +0 -0
  144. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_mets_validator.py +0 -0
  145. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_page_validator.py +0 -0
  146. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_decorators.py +0 -0
  147. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_logging.py +0 -0
  148. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_logging_conf.py +0 -0
  149. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_mets_server.py +0 -0
  150. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_model_factory.py +0 -0
  151. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resolver.py +0 -0
  152. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resolver_oai.py +0 -0
  153. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resource_manager.py +0 -0
  154. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_task_sequence.py +0 -0
  155. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_utils.py +0 -0
  156. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_version.py +0 -0
  157. {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_workspace_remove.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0a2
3
+ Version: 3.0.0b1
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
ocrd-3.0.0b1/VERSION ADDED
@@ -0,0 +1 @@
1
+ 3.0.0b1
@@ -10,6 +10,36 @@ import click
10
10
 
11
11
  from ocrd_utils import config
12
12
 
13
+ # pylint: disable=wrong-import-position
14
+
15
+ def command_with_replaced_help(*replacements):
16
+
17
+ class CommandWithReplacedHelp(click.Command):
18
+ def get_help(self, ctx):
19
+ newhelp = super().get_help(ctx)
20
+ for replacement in replacements:
21
+ newhelp = re.sub(*replacement, newhelp)
22
+ # print(newhelp)
23
+ return newhelp
24
+
25
+ return CommandWithReplacedHelp
26
+
27
+ # pylint: enable=wrong-import-position
28
+
29
+ from ..decorators import ocrd_loglevel
30
+ from .ocrd_tool import ocrd_tool_cli
31
+ from .workspace import workspace_cli
32
+ from .process import process_cli
33
+ from .bashlib import bashlib_cli
34
+ from .validate import validate_cli
35
+ from .resmgr import resmgr_cli
36
+ from .zip import zip_cli
37
+ from .log import log_cli
38
+ from .network import network_cli
39
+
40
+
41
+ __all__ = ['cli']
42
+
13
43
  _epilog = f"""
14
44
 
15
45
  \b
@@ -41,6 +71,10 @@ Variables:
41
71
  \b
42
72
  {config.describe('OCRD_MAX_PROCESSOR_CACHE')}
43
73
  \b
74
+ {config.describe('OCRD_NETWORK_CLIENT_POLLING_SLEEP')}
75
+ \b
76
+ {config.describe('OCRD_NETWORK_CLIENT_POLLING_TIMEOUT')}
77
+ \b
44
78
  {config.describe('OCRD_NETWORK_SERVER_ADDR_PROCESSING')}
45
79
  \b
46
80
  {config.describe('OCRD_NETWORK_SERVER_ADDR_WORKFLOW')}
@@ -60,30 +94,6 @@ Variables:
60
94
  {config.describe('OCRD_LOGGING_DEBUG')}
61
95
  """
62
96
 
63
- def command_with_replaced_help(*replacements):
64
-
65
- class CommandWithReplacedHelp(click.Command):
66
- def get_help(self, ctx):
67
- help = super().get_help(ctx)
68
- for replacement in replacements:
69
- help = re.sub(*replacement, help)
70
- # print(help)
71
- return help
72
-
73
- return CommandWithReplacedHelp
74
-
75
-
76
- from ..decorators import ocrd_loglevel
77
- from .ocrd_tool import ocrd_tool_cli
78
- from .workspace import workspace_cli
79
- from .process import process_cli
80
- from .bashlib import bashlib_cli
81
- from .validate import validate_cli
82
- from .resmgr import resmgr_cli
83
- from .zip import zip_cli
84
- from .log import log_cli
85
- from .network import network_cli
86
-
87
97
  @click.group(epilog=_epilog)
88
98
  @click.version_option(package_name='ocrd')
89
99
  @ocrd_loglevel
@@ -101,5 +111,3 @@ cli.add_command(validate_cli)
101
111
  cli.add_command(log_cli)
102
112
  cli.add_command(resmgr_cli)
103
113
  cli.add_command(network_cli)
104
-
105
- __all__ = ['cli']
@@ -8,7 +8,6 @@ OCR-D CLI: bash library
8
8
  """
9
9
  from __future__ import print_function
10
10
  import sys
11
- from os.path import isfile
12
11
  import click
13
12
 
14
13
  from ocrd.constants import BASHLIB_FILENAME
@@ -23,15 +22,7 @@ from ocrd.decorators import (
23
22
  ocrd_loglevel,
24
23
  ocrd_cli_wrap_processor
25
24
  )
26
- from ocrd_utils import (
27
- is_local_filename,
28
- get_local_filename,
29
- initLogging,
30
- getLogger,
31
- make_file_id,
32
- config
33
- )
34
- from ocrd.resolver import Resolver
25
+ from ocrd_utils import make_file_id
35
26
  from ocrd.processor import Processor
36
27
 
37
28
  # ----------------------------------------------------------------------
@@ -82,6 +73,8 @@ def bashlib_constants(name):
82
73
  print(val)
83
74
 
84
75
  @bashlib_cli.command('input-files')
76
+ @click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
77
+ @click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
85
78
  @click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
86
79
  @click.option('-w', '--working-dir', help="Working Directory")
87
80
  @click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
@@ -96,7 +89,7 @@ def bashlib_constants(name):
96
89
  @parameter_option
97
90
  @parameter_override_option
98
91
  @ocrd_loglevel
99
- def bashlib_input_files(**kwargs):
92
+ def bashlib_input_files(ocrd_tool, executable, **kwargs):
100
93
  """
101
94
  List input files for processing
102
95
 
@@ -108,12 +101,6 @@ def bashlib_input_files(**kwargs):
108
101
  (The printing format is one associative array initializer per line.)
109
102
  """
110
103
  class BashlibProcessor(Processor):
111
- @property
112
- def ocrd_tool(self):
113
- return {'executable': '', 'steps': ['']}
114
- @property
115
- def version(self):
116
- return '1.0'
117
104
  # go half way of the normal run_processor / process_workspace call tree
118
105
  # by just delegating to process_workspace, overriding process_page_file
119
106
  # to ensure all input files exist locally (without persisting them in the METS)
@@ -129,4 +116,31 @@ def bashlib_input_files(**kwargs):
129
116
  print(f"[{field}]='{value}'", end=' ')
130
117
  output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
131
118
  print(f"[outputFileId]='{output_file_id}'")
132
- ocrd_cli_wrap_processor(BashlibProcessor, **kwargs)
119
+ if ocrd_tool and executable:
120
+ class FullBashlibProcessor(BashlibProcessor):
121
+ @property
122
+ def metadata_location(self):
123
+ # needed for metadata loading and validation mechanism
124
+ return ocrd_tool
125
+ @property
126
+ def executable(self):
127
+ # needed for ocrd_tool lookup
128
+ return executable
129
+ else:
130
+ # we have no true metadata file, so fill in just to make it work
131
+ class FullBashlibProcessor(BashlibProcessor):
132
+ @property
133
+ def ocrd_tool(self):
134
+ # needed to satisfy the validator
135
+ return {'executable': '',
136
+ # required now
137
+ 'input_file_grp_cardinality': 1,
138
+ 'output_file_grp_cardinality': 1,
139
+ 'steps': ['']
140
+ }
141
+ @property
142
+ def version(self):
143
+ # needed to satisfy the validator and wrapper
144
+ return '1.0'
145
+
146
+ ocrd_cli_wrap_processor(FullBashlibProcessor, **kwargs)
@@ -17,7 +17,6 @@ from ocrd.decorators import parameter_option, parameter_override_option
17
17
  from ocrd.processor import Processor
18
18
  from ocrd_utils import (
19
19
  set_json_key_value_overrides,
20
- VERSION as OCRD_VERSION,
21
20
  parse_json_string_or_file,
22
21
  parse_json_string_with_comments as loads
23
22
  )
@@ -29,23 +28,26 @@ class OcrdToolCtx():
29
28
  self.filename = filename
30
29
  with codecs.open(filename, encoding='utf-8') as f:
31
30
  self.content = f.read()
31
+ # perhaps the validator should _always_ run (for default expansion)
32
+ # so validate command only for the report?
32
33
  self.json = loads(self.content)
34
+ self.tool_name = ''
33
35
 
34
36
  class BashProcessor(Processor):
35
37
  @property
36
- def metadata(inner_self):
38
+ def metadata(inner_self): # pylint: disable=no-self-argument,arguments-renamed
37
39
  return self.json
38
40
  @property
39
- def executable(inner_self):
41
+ def executable(inner_self): # pylint: disable=no-self-argument,arguments-renamed
40
42
  return self.tool_name
41
43
  @property
42
- def moduledir(inner_self):
44
+ def moduledir(inner_self): # pylint: disable=no-self-argument,arguments-renamed
43
45
  return os.path.dirname(self.filename)
44
46
  # set docstrings to empty
45
47
  __doc__ = None
46
48
  # HACK: override the module-level docstring, too
47
49
  getmodule(OcrdToolCtx).__doc__ = None
48
- def process(inner_self):
50
+ def process(inner_self): # pylint: disable=no-self-argument,arguments-renamed
49
51
  return super()
50
52
 
51
53
  self.processor = BashProcessor
@@ -6,7 +6,7 @@ OCR-D CLI: workspace management
6
6
  :nested: full
7
7
  """
8
8
  import os
9
- from os import getcwd, rmdir, unlink
9
+ from os import rmdir, unlink
10
10
  from os.path import dirname, relpath, normpath, exists, join, isabs, isdir
11
11
  from pathlib import Path
12
12
  from json import loads, dumps
@@ -14,7 +14,6 @@ import sys
14
14
  from glob import glob # XXX pathlib.Path.glob does not support absolute globs
15
15
  import re
16
16
  import time
17
- import numpy as np
18
17
 
19
18
  import click
20
19
 
@@ -118,7 +117,7 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
118
117
  @workspace_cli.command('clone', cls=command_with_replaced_help(
119
118
  (r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
120
119
  @click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
121
- @click.option('-a', '--download', is_flag=True, help="Download all files and change location in METS file after cloning")
120
+ @click.option('-a', '--download', is_flag=True, help="Download all selected files and add local path references in METS file afterwards")
122
121
  @click.argument('mets_url')
123
122
  @mets_find_options
124
123
  # XXX deprecated
@@ -129,8 +128,10 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
129
128
  Create a workspace from METS_URL and return the directory
130
129
 
131
130
  METS_URL can be a URL, an absolute path or a path relative to $PWD.
132
- If METS_URL is not provided, use --mets accordingly.
133
131
  METS_URL can also be an OAI-PMH GetRecord URL wrapping a METS file.
132
+
133
+ Additional options pertain to the selection of files / fileGrps / pages
134
+ to be downloaded, if --download is used.
134
135
  """
135
136
  LOG = getLogger('ocrd.cli.workspace.clone')
136
137
  if workspace_dir:
@@ -143,6 +144,7 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
143
144
  mets_basename=ctx.mets_basename,
144
145
  clobber_mets=clobber_mets,
145
146
  download=download,
147
+ fileGrp=file_grp,
146
148
  ID=file_id,
147
149
  pageId=page_id,
148
150
  mimetype=mimetype,
@@ -408,7 +410,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
408
410
  if dry_run:
409
411
  log.info('workspace.add_file(%s)' % file_dict)
410
412
  else:
411
- workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)
413
+ workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
412
414
 
413
415
  # save changes to disk
414
416
  workspace.save_mets()
@@ -452,7 +454,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
452
454
  snake_to_camel = {"file_id": "ID", "page_id": "pageId", "file_grp": "fileGrp"}
453
455
  output_field = [snake_to_camel.get(x, x) for x in output_field]
454
456
  modified_mets = False
455
- ret = list()
457
+ ret = []
456
458
  workspace = Workspace(
457
459
  ctx.resolver,
458
460
  directory=ctx.directory,
@@ -748,7 +750,7 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
748
750
 
749
751
  @workspace_cli.command('update-page')
750
752
  @click.option('--set', 'attr_value_pairs', help=f"set mets:div ATTR to VALUE. possible keys: {METS_PAGE_DIV_ATTRIBUTE.names()}", metavar="ATTR VALUE", nargs=2, multiple=True)
751
- @click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
753
+ @click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
752
754
  @click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
753
755
  @click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
754
756
  @click.argument('PAGE_ID')
@@ -757,7 +759,7 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
757
759
  """
758
760
  Update the @ID, @ORDER, @ORDERLABEL, @LABEL or @CONTENTIDS attributes of the mets:div with @ID=PAGE_ID
759
761
  """
760
- update_kwargs = {k: v for k, v in attr_value_pairs}
762
+ update_kwargs = dict(attr_value_pairs)
761
763
  if order:
762
764
  update_kwargs['ORDER'] = order
763
765
  if orderlabel:
@@ -1,4 +1,5 @@
1
1
  import sys
2
+ from contextlib import nullcontext
2
3
 
3
4
  from ocrd_utils import (
4
5
  config,
@@ -9,6 +10,7 @@ from ocrd_utils import (
9
10
  parse_json_string_with_comments,
10
11
  set_json_key_value_overrides,
11
12
  parse_json_string_or_file,
13
+ redirect_stderr_and_stdout_to_file,
12
14
  )
13
15
  from ocrd_validators import WorkspaceValidator
14
16
  from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
@@ -104,10 +106,10 @@ def ocrd_cli_wrap_processor(
104
106
  kwargs['parameter'] = parse_json_string_or_file(*kwargs['parameter'],
105
107
  resolve_preset_file=resolve)
106
108
  else:
107
- kwargs['parameter'] = dict()
109
+ kwargs['parameter'] = {}
108
110
  # Merge parameter overrides and parameters
109
111
  if 'parameter_override' in kwargs:
110
- set_json_key_value_overrides(kwargs['parameter'], *kwargs['parameter_override'])
112
+ set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override'))
111
113
  # Assert -I / -O
112
114
  if not kwargs['input_file_grp']:
113
115
  raise ValueError('-I/--input-file-grp is required')
@@ -140,17 +142,21 @@ def ocrd_cli_wrap_processor(
140
142
  print("Profiling...")
141
143
  pr = cProfile.Profile()
142
144
  pr.enable()
143
- def exit():
145
+ def goexit():
144
146
  pr.disable()
145
147
  print("Profiling completed")
146
148
  if profile_file:
147
- with open(profile_file, 'wb') as f:
148
- pr.dump_stats(profile_file)
149
+ pr.dump_stats(profile_file)
149
150
  s = io.StringIO()
150
151
  pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats()
151
152
  print(s.getvalue())
152
- atexit.register(exit)
153
- run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
153
+ atexit.register(goexit)
154
+ if log_filename:
155
+ log_ctx = redirect_stderr_and_stdout_to_file(log_filename)
156
+ else:
157
+ log_ctx = nullcontext()
158
+ with log_ctx:
159
+ run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
154
160
 
155
161
 
156
162
  def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
@@ -299,6 +299,8 @@ ocrd__wrap () {
299
299
  eval "ocrd__files[$i]=ocrd__file$i"
300
300
  let ++i
301
301
  done < <(ocrd bashlib input-files \
302
+ --ocrd-tool $OCRD_TOOL_JSON \
303
+ --executable $OCRD_TOOL_NAME \
302
304
  -m "${ocrd__argv[mets_file]}" \
303
305
  -I "${ocrd__argv[input_file_grp]}" \
304
306
  -O "${ocrd__argv[output_file_grp]}" \
@@ -21,7 +21,7 @@ from pydantic import BaseModel, Field, ValidationError
21
21
  import uvicorn
22
22
 
23
23
  from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
24
- from ocrd_utils import getLogger, deprecated_alias
24
+ from ocrd_utils import getLogger
25
25
 
26
26
 
27
27
  #
@@ -236,7 +236,7 @@ class ClientSideOcrdMets:
236
236
  agent_dict["_type"] = agent_dict.pop("type")
237
237
  return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in agent_dicts]
238
238
 
239
- def add_agent(self, *args, **kwargs):
239
+ def add_agent(self, **kwargs):
240
240
  if not self.multiplexing_mode:
241
241
  return self.session.request("POST", f"{self.url}/agent", json=OcrdAgentModel.create(**kwargs).dict())
242
242
  else:
@@ -403,7 +403,6 @@ class OcrdMetsServer:
403
403
  @staticmethod
404
404
  def kill_process(mets_server_pid: int):
405
405
  subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
406
- return
407
406
 
408
407
  def shutdown(self):
409
408
  if self.is_uds: