ocrd 3.8.0__py3-none-any.whl → 3.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/ocrd-all-tool.json +25 -0
- ocrd/processor/base.py +51 -43
- ocrd/processor/ocrd_page_result.py +74 -0
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/METADATA +1 -1
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/RECORD +16 -16
- ocrd_models/ocrd_page.py +20 -0
- ocrd_models/ocrd_page_generateds.py +1273 -69
- ocrd_network/cli/client.py +1 -1
- ocrd_network/client.py +4 -0
- ocrd_network/client_utils.py +9 -2
- ocrd_network/constants.py +1 -1
- ocrd_network/rabbitmq_utils/helpers.py +1 -1
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/LICENSE +0 -0
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/WHEEL +0 -0
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.8.0.dist-info → ocrd-3.9.0.dist-info}/top_level.txt +0 -0
ocrd/ocrd-all-tool.json
CHANGED
|
@@ -41,5 +41,30 @@
|
|
|
41
41
|
"description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
},
|
|
45
|
+
"ocrd-command": {
|
|
46
|
+
"executable": "ocrd-command",
|
|
47
|
+
"description": "Bare-bones processor runs shell commands to process PAGE files",
|
|
48
|
+
"steps": ["recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/analysis"],
|
|
49
|
+
"categories": [],
|
|
50
|
+
"input_file_grp_cardinality": [1, -1],
|
|
51
|
+
"output_file_grp_cardinality": 1,
|
|
52
|
+
"parameters": {
|
|
53
|
+
"command": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"default": "cat @INFILE > @OUTFILE",
|
|
56
|
+
"description": "Shell command to operate on PAGE files, with @INFILE as place-holder for the input file path(s), and @OUTFILE as place-holder for the output file path. If running on multiple input fileGrps, then @INFILE must be repeated as many times."
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"ocrd-merge": {
|
|
61
|
+
"executable": "ocrd-merge",
|
|
62
|
+
"description": "Bare-bones processor merges annotations from multiple fileGrps",
|
|
63
|
+
"steps": ["layout/segmentation"],
|
|
64
|
+
"categories": [],
|
|
65
|
+
"input_file_grp_cardinality": [1, -1],
|
|
66
|
+
"output_file_grp_cardinality": 1,
|
|
67
|
+
"parameters": {
|
|
68
|
+
}
|
|
44
69
|
}
|
|
45
70
|
}
|
ocrd/processor/base.py
CHANGED
|
@@ -824,51 +824,59 @@ class Processor():
|
|
|
824
824
|
if not any(input_pcgts):
|
|
825
825
|
self._base_logger.warning(f'skipping page {page_id}')
|
|
826
826
|
return
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
image_result.
|
|
848
|
-
|
|
849
|
-
image_result.alternative_image
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
image_result.
|
|
857
|
-
|
|
858
|
-
|
|
827
|
+
output_file_grps = self.output_file_grp.split(',')
|
|
828
|
+
output_file_ids = [make_file_id(input_files[input_pos], output_file_grp)
|
|
829
|
+
if input_files[input_pos].fileGrp != output_file_grp else
|
|
830
|
+
# input=output fileGrp: re-use ID exactly
|
|
831
|
+
input_files[input_pos].ID
|
|
832
|
+
for output_file_grp in output_file_grps]
|
|
833
|
+
if config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
|
|
834
|
+
for output_file_id in output_file_ids:
|
|
835
|
+
if output_file := next(self.workspace.mets.find_files(ID=output_file_id), None):
|
|
836
|
+
# short-cut avoiding useless computation:
|
|
837
|
+
raise FileExistsError(
|
|
838
|
+
f"A file with ID=={output_file_id} already exists {output_file}"
|
|
839
|
+
" and OCRD_EXISTING_OUTPUT != OVERWRITE"
|
|
840
|
+
)
|
|
841
|
+
results = self.process_page_pcgts(*input_pcgts, page_id=page_id)
|
|
842
|
+
if len(results) > len(output_file_grps):
|
|
843
|
+
self._base_logger.error(f"processor returned {len(results) - len(output_file_grps)} "
|
|
844
|
+
f"more results than specified output fileGrps for page {page_id}")
|
|
845
|
+
for result, output_file_id, output_file_grp in zip(results, output_file_ids, output_file_grps):
|
|
846
|
+
for image_result in result.images:
|
|
847
|
+
image_file_id = f'{output_file_id}_{image_result.file_id_suffix}'
|
|
848
|
+
image_file_path = join(output_file_grp, f'{image_file_id}.png')
|
|
849
|
+
if isinstance(image_result.alternative_image, PageType):
|
|
850
|
+
# special case: not an alternative image, but replacing the original image
|
|
851
|
+
# (this is needed by certain processors when the original's coordinate system
|
|
852
|
+
# cannot or must not be kept, e.g. dewarping)
|
|
853
|
+
image_result.alternative_image.set_imageFilename(image_file_path)
|
|
854
|
+
image_result.alternative_image.set_imageWidth(image_result.pil.width)
|
|
855
|
+
image_result.alternative_image.set_imageHeight(image_result.pil.height)
|
|
856
|
+
elif isinstance(image_result.alternative_image, AlternativeImageType):
|
|
857
|
+
image_result.alternative_image.set_filename(image_file_path)
|
|
858
|
+
elif image_result.alternative_image is None:
|
|
859
|
+
pass # do not reference in PAGE result
|
|
860
|
+
else:
|
|
861
|
+
raise ValueError(f"process_page_pcgts returned an OcrdPageResultImage of unknown type "
|
|
862
|
+
f"{type(image_result.alternative_image)}")
|
|
863
|
+
self.workspace.save_image_file(
|
|
864
|
+
image_result.pil,
|
|
865
|
+
image_file_id,
|
|
866
|
+
output_file_grp,
|
|
867
|
+
page_id=page_id,
|
|
868
|
+
file_path=image_file_path,
|
|
869
|
+
)
|
|
870
|
+
result.pcgts.set_pcGtsId(output_file_id)
|
|
871
|
+
self.add_metadata(result.pcgts)
|
|
872
|
+
self.workspace.add_file(
|
|
873
|
+
file_id=output_file_id,
|
|
874
|
+
file_grp=output_file_grp,
|
|
859
875
|
page_id=page_id,
|
|
860
|
-
|
|
876
|
+
local_filename=os.path.join(output_file_grp, output_file_id + '.xml'),
|
|
877
|
+
mimetype=MIMETYPE_PAGE,
|
|
878
|
+
content=to_xml(result.pcgts),
|
|
861
879
|
)
|
|
862
|
-
result.pcgts.set_pcGtsId(output_file_id)
|
|
863
|
-
self.add_metadata(result.pcgts)
|
|
864
|
-
self.workspace.add_file(
|
|
865
|
-
file_id=output_file_id,
|
|
866
|
-
file_grp=self.output_file_grp,
|
|
867
|
-
page_id=page_id,
|
|
868
|
-
local_filename=os.path.join(self.output_file_grp, output_file_id + '.xml'),
|
|
869
|
-
mimetype=MIMETYPE_PAGE,
|
|
870
|
-
content=to_xml(result.pcgts),
|
|
871
|
-
)
|
|
872
880
|
|
|
873
881
|
def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
|
|
874
882
|
"""
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
+
import copy
|
|
2
3
|
from typing import List, Union, Optional
|
|
3
4
|
from ocrd_models.ocrd_page import OcrdPage
|
|
4
5
|
from PIL.Image import Image
|
|
@@ -8,12 +9,85 @@ from ocrd_models.ocrd_page_generateds import AlternativeImageType, PageType
|
|
|
8
9
|
|
|
9
10
|
@dataclass
|
|
10
11
|
class OcrdPageResultImage():
|
|
12
|
+
"""
|
|
13
|
+
Encapsulates a single ``AlternativeImage`` reference to be persisted
|
|
14
|
+
as image file to the :py:class:`ocrd.Workspace`.
|
|
15
|
+
"""
|
|
11
16
|
pil: Image
|
|
17
|
+
"""
|
|
18
|
+
image data to be saved
|
|
19
|
+
"""
|
|
12
20
|
file_id_suffix: str
|
|
21
|
+
"""
|
|
22
|
+
a suffix to append to the file name when saving
|
|
23
|
+
(something like ``.IMG`` according to OCR-D
|
|
24
|
+
conventions for PAGE-XML)
|
|
25
|
+
"""
|
|
13
26
|
alternative_image: Optional[Union[AlternativeImageType, PageType]]
|
|
27
|
+
"""
|
|
28
|
+
the ``AlternativeImage`` instance that references this image;
|
|
29
|
+
to be amended with the actual (final) ``@filename`` when saving
|
|
30
|
+
|
|
31
|
+
alternatively, can be a ``Page`` instance: in that case,
|
|
32
|
+
amend its ``@imageFilename`` (i.e. replace the original image
|
|
33
|
+
of the PAGE-XML)
|
|
34
|
+
"""
|
|
14
35
|
|
|
15
36
|
|
|
16
37
|
@dataclass
|
|
17
38
|
class OcrdPageResult():
|
|
39
|
+
"""
|
|
40
|
+
Encapsulates the return type of :py:func:`ocrd.Processor.process_page_pcgts`,
|
|
41
|
+
i.e. an instance of :py:class:`ocrd_models.ocrd_page.OcrdPage` and an
|
|
42
|
+
accompanying list of :py:class:`OcrdPageResultImage` that contain all
|
|
43
|
+
image files referenced via ``AlternativeImage`` to be persisted into the
|
|
44
|
+
:py:class:`ocrd.Workspace` along with the PAGE-XML itself.
|
|
45
|
+
"""
|
|
18
46
|
pcgts: OcrdPage
|
|
19
47
|
images: List[OcrdPageResultImage] = field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
class OcrdPageResultVariadicListWrapper():
|
|
50
|
+
"""
|
|
51
|
+
Proxy object for :py:class:`ocrd.SingleOcrdPageResult` allowing
|
|
52
|
+
list semantics (i.e. multi-valued return from
|
|
53
|
+
:py:func:`ocrd.Processor.process_page_pcgts`) without changing
|
|
54
|
+
the API introduced in version 3.0.
|
|
55
|
+
|
|
56
|
+
Everything but list access will yield the old (singular valued)
|
|
57
|
+
semantics.
|
|
58
|
+
"""
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
pcgts: OcrdPage,
|
|
62
|
+
*args):
|
|
63
|
+
self._results = [SingleOcrdPageResult(pcgts)] + [
|
|
64
|
+
SingleOcrdPageResult(arg) for arg in args]
|
|
65
|
+
|
|
66
|
+
def __getitem__(self, key):
|
|
67
|
+
return self._results[key]
|
|
68
|
+
|
|
69
|
+
def __contains__(self, key):
|
|
70
|
+
return key in self._results
|
|
71
|
+
|
|
72
|
+
def __len__(self):
|
|
73
|
+
return len(self._results)
|
|
74
|
+
|
|
75
|
+
def __iter__(self):
|
|
76
|
+
return iter(self._results)
|
|
77
|
+
|
|
78
|
+
def __repr__(self):
|
|
79
|
+
return repr(self._results)
|
|
80
|
+
|
|
81
|
+
# allow copy() without infinite recursion
|
|
82
|
+
def __copy__(self):
|
|
83
|
+
return OcrdPageResultVariadicListWrapper(*copy.copy(self._results))
|
|
84
|
+
|
|
85
|
+
# allow deepcopy() without infinite recursion
|
|
86
|
+
def __deepcopy__(self, memo):
|
|
87
|
+
return OcrdPageResultVariadicListWrapper(*copy.deepcopy(self._results))
|
|
88
|
+
|
|
89
|
+
# delegate to all members of first result
|
|
90
|
+
def __getattr__(self, name):
|
|
91
|
+
return getattr(self._results[0], name)
|
|
92
|
+
|
|
93
|
+
SingleOcrdPageResult, OcrdPageResult = OcrdPageResult, OcrdPageResultVariadicListWrapper
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
|
|
2
2
|
ocrd/constants.py,sha256=REPY-y28MMsrTWBNB4oOsvX3W06Xr2fvtv9wuWH9oAI,633
|
|
3
3
|
ocrd/mets_server.py,sha256=LbZ0U2_o0W7cWO639U7E816dXabro8-8yHGX0quvHn4,22304
|
|
4
|
-
ocrd/ocrd-all-tool.json,sha256=
|
|
4
|
+
ocrd/ocrd-all-tool.json,sha256=qVTZq8cJtKQuEgtqYQVkpYQPz4BegO09b7TT-LcfsJs,3134
|
|
5
5
|
ocrd/resolver.py,sha256=7uwHRxaK8YMdKHe_a2dfrcNwL6UhQRJRVBrIX7GST7Q,15443
|
|
6
6
|
ocrd/resource_manager.py,sha256=2wo3JSCYE1oA0VgI8H901IsC-fnx6vRJ5qSMFgYNorE,20664
|
|
7
7
|
ocrd/task_sequence.py,sha256=r4e4iaP9AXzTL2xQZpfYnHuFXty5pE-ym3gIyUz1aJc,7180
|
|
@@ -23,9 +23,9 @@ ocrd/decorators/mets_find_options.py,sha256=8fiSdk-415o6-iBPB2T9He_v52qE8cTj3cCn
|
|
|
23
23
|
ocrd/decorators/ocrd_cli_options.py,sha256=Bemkq3V3QkOI3nNqGzphaNW7gjU9vNN-M5F2DvxvioM,2479
|
|
24
24
|
ocrd/decorators/parameter_option.py,sha256=TnCIcV9L5oAnI1Ew2TyFzo5FAwiIzWl2pn8oaD9jfEU,1056
|
|
25
25
|
ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
|
|
26
|
-
ocrd/processor/base.py,sha256=
|
|
26
|
+
ocrd/processor/base.py,sha256=yHwxd4ZkHLPuFgqQmOeDhMWAdCnHY_ptOjiSWj-FZqI,60600
|
|
27
27
|
ocrd/processor/helpers.py,sha256=4lR_QvZsxvh7f8_uK9YzdHP5-hvFU4qqYM_Cu_k41KI,10937
|
|
28
|
-
ocrd/processor/ocrd_page_result.py,sha256=
|
|
28
|
+
ocrd/processor/ocrd_page_result.py,sha256=hHV1TlKhKFN848cUCqR31v2R3HH4HEoeyGXqUc2DLkY,2945
|
|
29
29
|
ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
ocrd/processor/builtin/dummy_processor.py,sha256=SmMRtN0w88kBU24654ThT-yf84SFsFW4BOcmwsDDWdc,3533
|
|
31
31
|
ocrd/processor/builtin/filter_processor.py,sha256=9mbMq_XTJa8wrlbNdf46GUMNdjedz-enxafsCrnNhEo,4295
|
|
@@ -46,16 +46,16 @@ ocrd_models/ocrd_agent.py,sha256=Nm0XDNCmWZ8O3xsXaY-WmEghttXmh90UKmAObCL99IY,561
|
|
|
46
46
|
ocrd_models/ocrd_exif.py,sha256=HSLPn_WBDRIlMtKNYilLHm8WjX-b14HgnqT_KfzjS_0,4680
|
|
47
47
|
ocrd_models/ocrd_file.py,sha256=9-mfDb91RVy3p9rKryl-C39P4Of6Rb8OZBuxAee4VrI,9723
|
|
48
48
|
ocrd_models/ocrd_mets.py,sha256=lz9mlDq9A9UmZDoN8lh5XRnBzdAtLLZywDZSbyZPS84,50905
|
|
49
|
-
ocrd_models/ocrd_page.py,sha256=
|
|
50
|
-
ocrd_models/ocrd_page_generateds.py,sha256=
|
|
49
|
+
ocrd_models/ocrd_page.py,sha256=Hed1PJ4JWSkTVj7mVOWDaJqtZ9Fc9czzOfPr6flFohw,6818
|
|
50
|
+
ocrd_models/ocrd_page_generateds.py,sha256=hBIhOs_slXdQza_zokBfjjsrimX76h9I_6prRWbgVAk,911920
|
|
51
51
|
ocrd_models/ocrd_xml_base.py,sha256=iOnDl2zBNhN-Q4moLWiFkSqXvfRzxE5wbp5Tjsu1W6A,1642
|
|
52
52
|
ocrd_models/report.py,sha256=CX-t9ZDi2VmAy8M1Azsh83UsvE_f5pMeEC7tPaA-ztU,2021
|
|
53
53
|
ocrd_models/utils.py,sha256=A-H11ZJ65ZjH4DPK9s_Yz6JtA9fbTQ2jY-__9s7Hrg8,2320
|
|
54
54
|
ocrd_models/xpath_functions.py,sha256=VM2f9hl8ja4NrDOEQRSYdx7GewwAxfoyGMDjqjgA_7g,1439
|
|
55
55
|
ocrd_network/__init__.py,sha256=NWlSgXi7z45ow37AmITxfCB1d-L39rO8ttyxNJ-z8G0,376
|
|
56
|
-
ocrd_network/client.py,sha256=
|
|
57
|
-
ocrd_network/client_utils.py,sha256=
|
|
58
|
-
ocrd_network/constants.py,sha256=
|
|
56
|
+
ocrd_network/client.py,sha256=hi13uDUYC5t7xHtZEUYwNBAZOvovWaScfCtFSORVg7Q,3224
|
|
57
|
+
ocrd_network/client_utils.py,sha256=d5UE0MdDJxsYxIQemKcoUuALOiPJ8Cew8bjgsg9d71w,5709
|
|
58
|
+
ocrd_network/constants.py,sha256=mUjpkZDYPdRZmOeC0jyzQkuLuWrODLFzlrAHkguKWGg,1942
|
|
59
59
|
ocrd_network/database.py,sha256=-SddvaMLKn0pjdONyvWmjxfPJd6viedAIp6Lj1sU1Zs,10705
|
|
60
60
|
ocrd_network/logging_utils.py,sha256=hXwS46FzY_HTh92DgnxTuARxj8C18bOBmFKVrvBlUgc,2409
|
|
61
61
|
ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
|
|
@@ -68,7 +68,7 @@ ocrd_network/server_utils.py,sha256=Lxby62gHvrSbHgpWXvyZGdsWajp2TFzyxjHdMZWBESk,
|
|
|
68
68
|
ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
|
|
69
69
|
ocrd_network/utils.py,sha256=yE-nV_sv171tPp7weIFOxYw6HJlxvGBmrS8b1rIHS7c,6760
|
|
70
70
|
ocrd_network/cli/__init__.py,sha256=VBjjXcn-2O5gerqE6UdNfS-EkVFEVPQFHylsn8F9kfY,317
|
|
71
|
-
ocrd_network/cli/client.py,sha256=
|
|
71
|
+
ocrd_network/cli/client.py,sha256=aZbUqPSQtUcCk-4zz-qNwRTGy42-KvzGk44L2_FVR4k,10357
|
|
72
72
|
ocrd_network/cli/processing_server.py,sha256=NsuI0f9h4KDwe39YugmHo5cJ_29chcLLQ7DThKfPO7s,770
|
|
73
73
|
ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
|
|
74
74
|
ocrd_network/cli/resmgr_server.py,sha256=sc0VX_RehTbg8Qp7ht_DvVqsrdL5b9Zw3bBgWcAD13A,826
|
|
@@ -81,7 +81,7 @@ ocrd_network/rabbitmq_utils/__init__.py,sha256=XLIqZhfin4I4m80G9B__UcP45Lz10_mEp
|
|
|
81
81
|
ocrd_network/rabbitmq_utils/connector.py,sha256=N6mzjIf5FkVIno3FI1AksZY4F5jMUAm8baay0nXZx8w,11343
|
|
82
82
|
ocrd_network/rabbitmq_utils/constants.py,sha256=Zu_dKJASfrgnIvEZZlFX9uDR9y6w7zy0KhW7gP7wHDE,1063
|
|
83
83
|
ocrd_network/rabbitmq_utils/consumer.py,sha256=3WeryDmo0dSD9U0eLODbDElscvhEYjNeCBIewQHYfws,2488
|
|
84
|
-
ocrd_network/rabbitmq_utils/helpers.py,sha256=
|
|
84
|
+
ocrd_network/rabbitmq_utils/helpers.py,sha256=gbP9Ks4c_ksMln-VQ7GCND6ok_lttm6wW-R7Wszo5qA,5374
|
|
85
85
|
ocrd_network/rabbitmq_utils/ocrd_messages.py,sha256=wwzfMWbXmOFo_nd32_XySCso91_Ul-aGm_GhGncNxD4,4419
|
|
86
86
|
ocrd_network/rabbitmq_utils/publisher.py,sha256=mw4XQQhRE1xUQVgEUseyG845iIgVO-9GdGwNH6nUFms,2433
|
|
87
87
|
ocrd_network/runtime_data/__init__.py,sha256=PnWuuagElbkTzGtPWQEk5wlFtDxqT7B48S0Zrgt8H68,320
|
|
@@ -123,9 +123,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
|
|
|
123
123
|
ocrd_validators/xsd_mets_validator.py,sha256=YgiuNtwNDtn3LuvdFFscnmsGREF_wQ4wtA76yE2Iljw,469
|
|
124
124
|
ocrd_validators/xsd_page_validator.py,sha256=ggt-nmaz-DDyAPwm3ZMVvtChuV2BJ2ZEEbWpePL9vTk,469
|
|
125
125
|
ocrd_validators/xsd_validator.py,sha256=ahJo_oVvTK_JB0Cu4CkMC8l_gbzsyW91AxGtelMjqrg,2115
|
|
126
|
-
ocrd-3.
|
|
127
|
-
ocrd-3.
|
|
128
|
-
ocrd-3.
|
|
129
|
-
ocrd-3.
|
|
130
|
-
ocrd-3.
|
|
131
|
-
ocrd-3.
|
|
126
|
+
ocrd-3.9.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
127
|
+
ocrd-3.9.0.dist-info/METADATA,sha256=sKR-ODMcThMWizUZ_1duc0bj4oruD0RPnc5z7-AJly4,11396
|
|
128
|
+
ocrd-3.9.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
129
|
+
ocrd-3.9.0.dist-info/entry_points.txt,sha256=CI-NoDR1BYmsuAsJmPAn4NrN9guzdedHGUbC8QSmdGs,266
|
|
130
|
+
ocrd-3.9.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
|
|
131
|
+
ocrd-3.9.0.dist-info/RECORD,,
|
ocrd_models/ocrd_page.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
API to PAGE-XML, generated with generateDS from XML schema.
|
|
3
3
|
"""
|
|
4
4
|
from io import StringIO
|
|
5
|
+
import copy
|
|
5
6
|
from typing import Dict, Union, Any
|
|
6
7
|
from lxml import etree as ET
|
|
7
8
|
from elementpath import XPath2Parser, XPathContext
|
|
@@ -212,6 +213,25 @@ class OcrdPage():
|
|
|
212
213
|
self.xpath_context = XPathContext(self.etree)
|
|
213
214
|
self.xpath = lambda expression: self.xpath_parser.parse(expression).get_results(self.xpath_context)
|
|
214
215
|
|
|
216
|
+
# allow copy() without infinite recursion
|
|
217
|
+
def __copy__(self):
|
|
218
|
+
return OcrdPage(
|
|
219
|
+
copy.copy(self._pcgts),
|
|
220
|
+
copy.copy(self.etree),
|
|
221
|
+
copy.copy(self.mapping),
|
|
222
|
+
copy.copy(self.revmap),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# allow deepcopy() without infinite recursion
|
|
226
|
+
def __deepcopy__(self, memo):
|
|
227
|
+
return OcrdPage(
|
|
228
|
+
copy.deepcopy(self._pcgts, memo),
|
|
229
|
+
copy.deepcopy(self.etree, memo),
|
|
230
|
+
copy.deepcopy(self.mapping, memo),
|
|
231
|
+
copy.deepcopy(self.revmap, memo),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# delegate to all members of ._pcgts
|
|
215
235
|
def __getattr__(self, name):
|
|
216
236
|
return getattr(self._pcgts, name)
|
|
217
237
|
|