ocrd 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +3 -1
- ocrd/decorators/__init__.py +3 -2
- ocrd/mets_server.py +62 -42
- ocrd/processor/base.py +7 -6
- ocrd/processor/builtin/dummy/ocrd-tool.json +20 -0
- ocrd/processor/builtin/dummy_processor.py +0 -3
- ocrd/processor/builtin/filter_processor.py +108 -0
- ocrd/resource_manager.py +4 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/METADATA +2 -1
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/RECORD +32 -31
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/entry_points.txt +1 -0
- ocrd_modelfactory/__init__.py +7 -1
- ocrd_models/ocrd_exif.py +2 -2
- ocrd_models/ocrd_page.py +22 -3
- ocrd_models/ocrd_page_generateds.py +2813 -1438
- ocrd_models/xpath_functions.py +51 -0
- ocrd_network/cli/client.py +27 -8
- ocrd_network/client.py +9 -6
- ocrd_network/client_utils.py +25 -14
- ocrd_network/processing_server.py +27 -15
- ocrd_network/processing_worker.py +7 -4
- ocrd_network/processor_server.py +2 -1
- ocrd_network/rabbitmq_utils/connector.py +2 -2
- ocrd_network/runtime_data/deployer.py +28 -18
- ocrd_network/server_cache.py +26 -23
- ocrd_network/server_utils.py +40 -4
- ocrd_network/tcp_to_uds_mets_proxy.py +8 -5
- ocrd_network/utils.py +19 -15
- ocrd_utils/config.py +38 -16
- ocrd/processor/concurrent.py +0 -909
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/LICENSE +0 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/WHEEL +0 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/top_level.txt +0 -0
ocrd_modelfactory/__init__.py
CHANGED
|
@@ -101,5 +101,11 @@ def page_from_file(input_file, **kwargs) -> OcrdPage:
|
|
|
101
101
|
if input_file.mimetype.startswith('image'):
|
|
102
102
|
return page_from_image(input_file)
|
|
103
103
|
if input_file.mimetype == MIMETYPE_PAGE:
|
|
104
|
-
|
|
104
|
+
revmap = {}
|
|
105
|
+
# the old/default gds.reverse_node_mapping is useless
|
|
106
|
+
# since 2.39.4, we can actually get the exact reverse mapping for perfect round-trip
|
|
107
|
+
# but awkwardly, we have to pass the dict in for that
|
|
108
|
+
page = OcrdPage(*parseEtree(input_file.local_filename, reverse_mapping=revmap, silence=True))
|
|
109
|
+
page.revmap = revmap
|
|
110
|
+
return page
|
|
105
111
|
raise ValueError("Unsupported mimetype '%s'" % input_file.mimetype)
|
ocrd_models/ocrd_exif.py
CHANGED
|
@@ -49,11 +49,11 @@ class OcrdExif():
|
|
|
49
49
|
for prop in ['compression', 'photometric_interpretation']:
|
|
50
50
|
setattr(self, prop, img.info[prop] if prop in img.info else None)
|
|
51
51
|
if img.filename:
|
|
52
|
-
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', img.filename], check=False, stderr=PIPE, stdout=PIPE)
|
|
52
|
+
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', img.filename], check=False, stderr=PIPE, stdout=PIPE)
|
|
53
53
|
else:
|
|
54
54
|
with BytesIO() as bio:
|
|
55
55
|
img.save(bio, format=img.format)
|
|
56
|
-
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue())
|
|
56
|
+
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue())
|
|
57
57
|
if ret.returncode:
|
|
58
58
|
stderr = ret.stderr.decode('utf-8')
|
|
59
59
|
if 'no decode delegate for this image format' in stderr:
|
ocrd_models/ocrd_page.py
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
API to PAGE-XML, generated with generateDS from XML schema.
|
|
3
3
|
"""
|
|
4
4
|
from io import StringIO
|
|
5
|
-
from typing import Dict, Union
|
|
5
|
+
from typing import Dict, Union, Any
|
|
6
6
|
from lxml import etree as ET
|
|
7
|
+
from elementpath import XPath2Parser, XPathContext
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
'parse',
|
|
@@ -132,6 +133,7 @@ from .ocrd_page_generateds import (
|
|
|
132
133
|
)
|
|
133
134
|
|
|
134
135
|
from .constants import NAMESPACES
|
|
136
|
+
from .xpath_functions import pc_functions
|
|
135
137
|
|
|
136
138
|
# add docstrings
|
|
137
139
|
parse.__doc__ = (
|
|
@@ -189,12 +191,25 @@ class OcrdPage():
|
|
|
189
191
|
pcgts : PcGtsType,
|
|
190
192
|
etree : ET._Element,
|
|
191
193
|
mapping : Dict[str, ET._Element],
|
|
192
|
-
revmap : Dict[ET._Element,
|
|
194
|
+
revmap : Dict[ET._Element, Any],
|
|
193
195
|
):
|
|
194
196
|
self._pcgts = pcgts
|
|
195
197
|
self.etree = etree
|
|
196
198
|
self.mapping = mapping
|
|
197
199
|
self.revmap = revmap
|
|
200
|
+
self.xpath_parser = XPath2Parser(namespaces={
|
|
201
|
+
'page': NAMESPACES['page'],
|
|
202
|
+
'pc': NAMESPACES['page']})
|
|
203
|
+
for func in pc_functions:
|
|
204
|
+
name = func.__name__.replace('_', '-')
|
|
205
|
+
if name.startswith('pc-'):
|
|
206
|
+
name = name[3:]
|
|
207
|
+
elif name.startswith('pc'):
|
|
208
|
+
name = name[2:]
|
|
209
|
+
# register
|
|
210
|
+
self.xpath_parser.external_function(func, name=name, prefix='pc')
|
|
211
|
+
self.xpath_context = XPathContext(self.etree)
|
|
212
|
+
self.xpath = lambda expression: self.xpath_parser.parse(expression).get_results(self.xpath_context)
|
|
198
213
|
|
|
199
214
|
def __getattr__(self, name):
|
|
200
215
|
return getattr(self._pcgts, name)
|
|
@@ -208,11 +223,15 @@ def to_xml(el, skip_declaration=False) -> str:
|
|
|
208
223
|
# XXX remove potential empty ReadingOrder
|
|
209
224
|
if hasattr(el, 'prune_ReadingOrder'):
|
|
210
225
|
el.prune_ReadingOrder()
|
|
226
|
+
if hasattr(el, 'original_tagname_'):
|
|
227
|
+
name = el.original_tagname_ or 'PcGts'
|
|
228
|
+
else:
|
|
229
|
+
name = 'PcGts'
|
|
211
230
|
sio = StringIO()
|
|
212
231
|
el.export(
|
|
213
232
|
outfile=sio,
|
|
214
233
|
level=0,
|
|
215
|
-
name_=
|
|
234
|
+
name_=name,
|
|
216
235
|
namespaceprefix_='pc:',
|
|
217
236
|
namespacedef_='xmlns:pc="%s" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="%s %s/pagecontent.xsd"' % (
|
|
218
237
|
NAMESPACES['page'],
|