ds-caselaw-marklogic-api-client 25.0.0__py3-none-any.whl → 27.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- caselawclient/Client.py +3 -10
- caselawclient/factories.py +165 -0
- caselawclient/models/{documents.py → documents/__init__.py} +33 -226
- caselawclient/models/documents/body.py +139 -0
- caselawclient/models/documents/exceptions.py +6 -0
- caselawclient/models/documents/statuses.py +12 -0
- caselawclient/models/documents/xml.py +43 -0
- caselawclient/models/judgments.py +12 -10
- caselawclient/models/neutral_citation_mixin.py +7 -11
- caselawclient/models/press_summaries.py +11 -9
- caselawclient/models/utilities/aws.py +2 -0
- caselawclient/models/utilities/dates.py +1 -5
- caselawclient/models/utilities/move.py +12 -63
- caselawclient/responses/search_response.py +2 -2
- caselawclient/responses/search_result.py +6 -4
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-27.0.0.dist-info}/METADATA +2 -2
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-27.0.0.dist-info}/RECORD +19 -14
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-27.0.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-27.0.0.dist-info}/WHEEL +0 -0
caselawclient/Client.py
CHANGED
|
@@ -12,6 +12,7 @@ from xml.etree.ElementTree import Element, ParseError, fromstring
|
|
|
12
12
|
|
|
13
13
|
import environ
|
|
14
14
|
import requests
|
|
15
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
15
16
|
from requests.auth import HTTPBasicAuth
|
|
16
17
|
from requests.structures import CaseInsensitiveDict
|
|
17
18
|
from requests_toolbelt.multipart import decoder
|
|
@@ -800,10 +801,7 @@ class MarklogicApiClient:
|
|
|
800
801
|
else None
|
|
801
802
|
)
|
|
802
803
|
|
|
803
|
-
|
|
804
|
-
image_location = os.getenv("XSLT_IMAGE_LOCATION")
|
|
805
|
-
else:
|
|
806
|
-
image_location = ""
|
|
804
|
+
image_location = os.getenv("XSLT_IMAGE_LOCATION", "")
|
|
807
805
|
|
|
808
806
|
show_unpublished = self.verify_show_unpublished(show_unpublished)
|
|
809
807
|
|
|
@@ -1038,12 +1036,7 @@ class MarklogicApiClient:
|
|
|
1038
1036
|
search_parameters.collections = [DOCUMENT_COLLECTION_URI_JUDGMENT]
|
|
1039
1037
|
return self.search_and_decode_response(search_parameters)
|
|
1040
1038
|
|
|
1041
|
-
def
|
|
1042
|
-
"""Move the judgment at old_uri on top of the new citation, which must already exist
|
|
1043
|
-
Compare to update_document_uri"""
|
|
1044
|
-
return move.overwrite_document(old_uri, new_citation, api_client=self)
|
|
1045
|
-
|
|
1046
|
-
def update_document_uri(self, old_uri: str, new_citation: str) -> str:
|
|
1039
|
+
def update_document_uri(self, old_uri: DocumentURIString, new_citation: NeutralCitationString) -> DocumentURIString:
|
|
1047
1040
|
"""
|
|
1048
1041
|
Move the document at old_uri to the correct location based on the neutral citation
|
|
1049
1042
|
The new neutral citation *must* not already exist (that is handled elsewhere)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Any, Optional, cast
|
|
3
|
+
from unittest.mock import Mock, patch
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
from caselawclient.Client import MarklogicApiClient
|
|
8
|
+
from caselawclient.models.documents import Document
|
|
9
|
+
from caselawclient.models.documents.body import DocumentBody
|
|
10
|
+
from caselawclient.models.judgments import Judgment
|
|
11
|
+
from caselawclient.models.press_summaries import PressSummary
|
|
12
|
+
from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
|
|
13
|
+
|
|
14
|
+
DEFAULT_DOCUMENT_BODY_XML = "<akomantoso>This is some XML of a judgment.</akomantoso>"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DocumentBodyFactory:
|
|
18
|
+
# "name_of_attribute": "default value"
|
|
19
|
+
PARAMS_MAP: dict[str, Any] = {
|
|
20
|
+
"name": "Judgment v Judgement",
|
|
21
|
+
"court": "Court of Testing",
|
|
22
|
+
"document_date_as_string": "2023-02-03",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def build(cls, xml_string: str = DEFAULT_DOCUMENT_BODY_XML, **kwargs: Any) -> DocumentBody:
|
|
27
|
+
document_body = DocumentBody(
|
|
28
|
+
xml_bytestring=xml_string.encode(encoding="utf-8"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
for param_name, default_value in cls.PARAMS_MAP.items():
|
|
32
|
+
value = kwargs.get(param_name, default_value)
|
|
33
|
+
setattr(document_body, param_name, value)
|
|
34
|
+
|
|
35
|
+
return document_body
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DocumentFactory:
|
|
39
|
+
# "name_of_attribute": "default value"
|
|
40
|
+
PARAMS_MAP: dict[str, Any] = {
|
|
41
|
+
"is_published": False,
|
|
42
|
+
"is_sensitive": False,
|
|
43
|
+
"is_anonymised": False,
|
|
44
|
+
"is_failure": False,
|
|
45
|
+
"source_name": "Example Uploader",
|
|
46
|
+
"source_email": "uploader@example.com",
|
|
47
|
+
"consignment_reference": "TDR-12345",
|
|
48
|
+
"assigned_to": "",
|
|
49
|
+
"versions": [],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
target_class: TypeAlias = Document
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def build(
|
|
56
|
+
cls,
|
|
57
|
+
uri: str = "test/2023/123",
|
|
58
|
+
html: str = "<p>This is a judgment.</p>",
|
|
59
|
+
api_client: Optional[MarklogicApiClient] = None,
|
|
60
|
+
**kwargs: Any,
|
|
61
|
+
) -> target_class:
|
|
62
|
+
if not api_client:
|
|
63
|
+
api_client = Mock(spec=MarklogicApiClient)
|
|
64
|
+
api_client.get_judgment_xml_bytestring.return_value = DEFAULT_DOCUMENT_BODY_XML.encode(encoding="utf-8")
|
|
65
|
+
|
|
66
|
+
with patch.object(cls.target_class, "content_as_html") as mock_content_as_html:
|
|
67
|
+
mock_content_as_html.return_value = html
|
|
68
|
+
document = cls.target_class(uri, api_client=api_client)
|
|
69
|
+
|
|
70
|
+
document.body = kwargs.pop("body") if "body" in kwargs else DocumentBodyFactory.build()
|
|
71
|
+
|
|
72
|
+
for param_name, default_value in cls.PARAMS_MAP.items():
|
|
73
|
+
value = kwargs.get(param_name, default_value)
|
|
74
|
+
setattr(document, param_name, value)
|
|
75
|
+
|
|
76
|
+
return document
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class JudgmentFactory(DocumentFactory):
|
|
80
|
+
target_class = Judgment
|
|
81
|
+
|
|
82
|
+
def __init__(self) -> None:
|
|
83
|
+
self.PARAMS_MAP = self.PARAMS_MAP | {
|
|
84
|
+
"neutral_citation": "[2023] Test 123",
|
|
85
|
+
"best_human_identifier": "[2023] Test 123",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
super().__init__()
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def build(
|
|
92
|
+
cls,
|
|
93
|
+
uri: str = "test/2023/123",
|
|
94
|
+
html: str = "<p>This is a judgment.</p>",
|
|
95
|
+
api_client: Optional[MarklogicApiClient] = None,
|
|
96
|
+
**kwargs: Any,
|
|
97
|
+
) -> Judgment:
|
|
98
|
+
return cast(Judgment, super().build(uri, html, api_client, **kwargs))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class PressSummaryFactory(DocumentFactory):
|
|
102
|
+
target_class = PressSummary
|
|
103
|
+
|
|
104
|
+
def __init__(self) -> None:
|
|
105
|
+
self.PARAMS_MAP = self.PARAMS_MAP | {
|
|
106
|
+
"neutral_citation": "[2023] Test 123",
|
|
107
|
+
"best_human_identifier": "[2023] Test 123",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
super().__init__()
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def build(
|
|
114
|
+
cls,
|
|
115
|
+
uri: str = "test/2023/123/press-summary/1",
|
|
116
|
+
html: str = "<p>This is a judgment.</p>",
|
|
117
|
+
api_client: Optional[MarklogicApiClient] = None,
|
|
118
|
+
**kwargs: Any,
|
|
119
|
+
) -> PressSummary:
|
|
120
|
+
return cast(PressSummary, super().build(uri, html, api_client, **kwargs))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class SimpleFactory:
|
|
124
|
+
# "name_of_attribute": "default value"
|
|
125
|
+
PARAMS_MAP: dict[str, Any]
|
|
126
|
+
|
|
127
|
+
target_class: TypeAlias = object
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def build(cls, **kwargs: Any) -> target_class:
|
|
131
|
+
mock_object = Mock(spec=cls.target_class, autospec=True)
|
|
132
|
+
|
|
133
|
+
for param, default in cls.PARAMS_MAP.items():
|
|
134
|
+
if param in kwargs:
|
|
135
|
+
setattr(mock_object.return_value, param, kwargs[param])
|
|
136
|
+
else:
|
|
137
|
+
setattr(mock_object.return_value, param, default)
|
|
138
|
+
|
|
139
|
+
return mock_object()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class SearchResultMetadataFactory(SimpleFactory):
|
|
143
|
+
target_class = SearchResultMetadata
|
|
144
|
+
# "name_of_attribute": "default value"
|
|
145
|
+
PARAMS_MAP = {
|
|
146
|
+
"author": "Fake Name",
|
|
147
|
+
"author_email": "fake.email@gov.invalid",
|
|
148
|
+
"consignment_reference": "TDR-2023-ABC",
|
|
149
|
+
"submission_datetime": datetime.datetime(2023, 2, 3, 9, 12, 34),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class SearchResultFactory(SimpleFactory):
|
|
154
|
+
target_class = SearchResult
|
|
155
|
+
|
|
156
|
+
# "name_of_attribute": ("name of incoming param", "default value")
|
|
157
|
+
PARAMS_MAP = {
|
|
158
|
+
"uri": "test/2023/123",
|
|
159
|
+
"name": "Judgment v Judgement",
|
|
160
|
+
"neutral_citation": "[2023] Test 123",
|
|
161
|
+
"court": "Court of Testing",
|
|
162
|
+
"date": datetime.date(2023, 2, 3),
|
|
163
|
+
"metadata": SearchResultMetadataFactory.build(),
|
|
164
|
+
"is_failure": False,
|
|
165
|
+
}
|
|
@@ -1,27 +1,22 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import warnings
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, NewType, Optional
|
|
5
5
|
|
|
6
|
-
import pytz
|
|
7
6
|
from ds_caselaw_utils import courts
|
|
8
7
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
9
|
-
from
|
|
8
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
10
9
|
from lxml import html as html_parser
|
|
11
10
|
from requests_toolbelt.multipart import decoder
|
|
12
11
|
|
|
13
|
-
from caselawclient.
|
|
14
|
-
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
15
|
-
|
|
16
|
-
from ..errors import (
|
|
12
|
+
from caselawclient.errors import (
|
|
17
13
|
DocumentNotFoundError,
|
|
18
14
|
GatewayTimeoutError,
|
|
19
15
|
NotSupportedOnVersion,
|
|
20
16
|
OnlySupportedOnVersion,
|
|
21
17
|
)
|
|
22
|
-
from
|
|
23
|
-
from .utilities import
|
|
24
|
-
from .utilities.aws import (
|
|
18
|
+
from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
|
|
19
|
+
from caselawclient.models.utilities.aws import (
|
|
25
20
|
ParserInstructionsDict,
|
|
26
21
|
announce_document_event,
|
|
27
22
|
check_docx_exists,
|
|
@@ -34,31 +29,17 @@ from .utilities.aws import (
|
|
|
34
29
|
uri_for_s3,
|
|
35
30
|
)
|
|
36
31
|
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
from .body import DocumentBody
|
|
33
|
+
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
34
|
+
from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
|
|
39
35
|
|
|
40
|
-
|
|
41
|
-
pass
|
|
36
|
+
MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
|
|
42
37
|
|
|
43
38
|
|
|
44
39
|
class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
|
|
45
40
|
pass
|
|
46
41
|
|
|
47
42
|
|
|
48
|
-
DOCUMENT_STATUS_HOLD = "On hold"
|
|
49
|
-
""" This document has been placed on hold to actively prevent publication. """
|
|
50
|
-
|
|
51
|
-
DOCUMENT_STATUS_PUBLISHED = "Published"
|
|
52
|
-
""" This document has been published and should be considered publicly visible. """
|
|
53
|
-
|
|
54
|
-
DOCUMENT_STATUS_IN_PROGRESS = "In progress"
|
|
55
|
-
""" This document has not been published or put on hold, and has been picked up by an editor and
|
|
56
|
-
should be progressing through the document pipeline. """
|
|
57
|
-
|
|
58
|
-
DOCUMENT_STATUS_NEW = "New"
|
|
59
|
-
""" This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
|
|
60
|
-
|
|
61
|
-
|
|
62
43
|
DOCUMENT_COLLECTION_URI_JUDGMENT = "judgment"
|
|
63
44
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY = "press-summary"
|
|
64
45
|
|
|
@@ -67,19 +48,6 @@ if TYPE_CHECKING:
|
|
|
67
48
|
|
|
68
49
|
|
|
69
50
|
DocumentURIString = NewType("DocumentURIString", str)
|
|
70
|
-
CourtIdentifierString = NewType("CourtIdentifierString", str)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class CannotPublishUnpublishableDocument(Exception):
|
|
74
|
-
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class DocumentNotSafeForDeletion(Exception):
|
|
78
|
-
"""A document which is not safe for deletion cannot be deleted."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class NonXMLDocumentError(Exception):
|
|
82
|
-
"""A document cannot be parsed as XML."""
|
|
83
51
|
|
|
84
52
|
|
|
85
53
|
class Document:
|
|
@@ -96,7 +64,7 @@ class Document:
|
|
|
96
64
|
|
|
97
65
|
attributes_to_validate: list[tuple[str, bool, str]] = [
|
|
98
66
|
(
|
|
99
|
-
"
|
|
67
|
+
"is_failure",
|
|
100
68
|
False,
|
|
101
69
|
"This document failed to parse",
|
|
102
70
|
),
|
|
@@ -143,20 +111,18 @@ class Document:
|
|
|
143
111
|
|
|
144
112
|
:raises DocumentNotFoundError: The document does not exist within MarkLogic
|
|
145
113
|
"""
|
|
146
|
-
self.uri = DocumentURIString(uri.strip("/"))
|
|
147
|
-
self.api_client = api_client
|
|
114
|
+
self.uri: DocumentURIString = DocumentURIString(uri.strip("/"))
|
|
115
|
+
self.api_client: MarklogicApiClient = api_client
|
|
148
116
|
if not self.document_exists():
|
|
149
117
|
raise DocumentNotFoundError(f"Document {self.uri} does not exist")
|
|
150
118
|
|
|
151
|
-
self.
|
|
152
|
-
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
153
|
-
self.uri,
|
|
154
|
-
show_unpublished=True,
|
|
155
|
-
),
|
|
119
|
+
self.body: DocumentBody = DocumentBody(
|
|
120
|
+
xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
|
|
156
121
|
)
|
|
122
|
+
""" `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
|
|
157
123
|
|
|
158
124
|
def __repr__(self) -> str:
|
|
159
|
-
name = self.name or "un-named"
|
|
125
|
+
name = self.body.name or "un-named"
|
|
160
126
|
return f"<{self.document_noun} {self.uri}: {name}>"
|
|
161
127
|
|
|
162
128
|
def document_exists(self) -> bool:
|
|
@@ -186,104 +152,6 @@ class Document:
|
|
|
186
152
|
"""
|
|
187
153
|
return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
|
|
188
154
|
|
|
189
|
-
@cached_property
|
|
190
|
-
def name(self) -> str:
|
|
191
|
-
return self.xml.get_xpath_match_string(
|
|
192
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
|
|
193
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
@cached_property
|
|
197
|
-
def court(self) -> str:
|
|
198
|
-
return self.xml.get_xpath_match_string(
|
|
199
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
200
|
-
{
|
|
201
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
202
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
203
|
-
},
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
@cached_property
|
|
207
|
-
def jurisdiction(self) -> str:
|
|
208
|
-
return self.xml.get_xpath_match_string(
|
|
209
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
|
|
210
|
-
{
|
|
211
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
212
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
213
|
-
},
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
@property
|
|
217
|
-
def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
|
|
218
|
-
if self.jurisdiction != "":
|
|
219
|
-
return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
|
|
220
|
-
return CourtIdentifierString(self.court)
|
|
221
|
-
|
|
222
|
-
@cached_property
|
|
223
|
-
def document_date_as_string(self) -> str:
|
|
224
|
-
return self.xml.get_xpath_match_string(
|
|
225
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
226
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
@cached_property
|
|
230
|
-
def document_date_as_date(self) -> Optional[datetime.date]:
|
|
231
|
-
if not self.document_date_as_string:
|
|
232
|
-
return None
|
|
233
|
-
try:
|
|
234
|
-
return datetime.datetime.strptime(
|
|
235
|
-
self.document_date_as_string,
|
|
236
|
-
"%Y-%m-%d",
|
|
237
|
-
).date()
|
|
238
|
-
except ValueError:
|
|
239
|
-
warnings.warn(
|
|
240
|
-
f"Unparsable date encountered: {self.document_date_as_string}",
|
|
241
|
-
UnparsableDate,
|
|
242
|
-
)
|
|
243
|
-
return None
|
|
244
|
-
|
|
245
|
-
def get_manifestation_datetimes(
|
|
246
|
-
self,
|
|
247
|
-
name: Optional[str] = None,
|
|
248
|
-
) -> list[datetime.datetime]:
|
|
249
|
-
name_filter = f"[@name='{name}']" if name else ""
|
|
250
|
-
iso_datetimes = self.xml.get_xpath_match_strings(
|
|
251
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
|
|
252
|
-
f"/akn:FRBRdate{name_filter}/@date",
|
|
253
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
257
|
-
|
|
258
|
-
def get_latest_manifestation_datetime(
|
|
259
|
-
self,
|
|
260
|
-
name: Optional[str] = None,
|
|
261
|
-
) -> Optional[datetime.datetime]:
|
|
262
|
-
events = self.get_manifestation_datetimes(name)
|
|
263
|
-
if not events:
|
|
264
|
-
return None
|
|
265
|
-
return max(events)
|
|
266
|
-
|
|
267
|
-
def get_latest_manifestation_type(self) -> Optional[str]:
|
|
268
|
-
return max(
|
|
269
|
-
(
|
|
270
|
-
(type, time)
|
|
271
|
-
for type in ["transform", "tna-enriched"]
|
|
272
|
-
if (time := self.get_latest_manifestation_datetime(type))
|
|
273
|
-
),
|
|
274
|
-
key=lambda x: x[1],
|
|
275
|
-
)[0]
|
|
276
|
-
|
|
277
|
-
@cached_property
|
|
278
|
-
def transformation_datetime(self) -> Optional[datetime.datetime]:
|
|
279
|
-
"""When was this document successfully parsed or reparsed (date from XML)"""
|
|
280
|
-
return self.get_latest_manifestation_datetime("transform")
|
|
281
|
-
|
|
282
|
-
@cached_property
|
|
283
|
-
def enrichment_datetime(self) -> Optional[datetime.datetime]:
|
|
284
|
-
"""When was this document successfully enriched (date from XML)"""
|
|
285
|
-
return self.get_latest_manifestation_datetime("tna-enriched")
|
|
286
|
-
|
|
287
155
|
@cached_property
|
|
288
156
|
def is_published(self) -> bool:
|
|
289
157
|
return self.api_client.get_published(self.uri)
|
|
@@ -372,10 +240,6 @@ class Document:
|
|
|
372
240
|
"Is this document a potentially historic version of a document, or is it the main document itself?"
|
|
373
241
|
return extract_version(self.uri) != 0
|
|
374
242
|
|
|
375
|
-
@cached_property
|
|
376
|
-
def content_as_xml(self) -> str:
|
|
377
|
-
return self.xml.xml_as_string
|
|
378
|
-
|
|
379
243
|
def content_as_html(
|
|
380
244
|
self,
|
|
381
245
|
version_uri: Optional[DocumentURIString] = None,
|
|
@@ -418,39 +282,21 @@ class Document:
|
|
|
418
282
|
|
|
419
283
|
:return: `True` if this document is in a 'failure' state, otherwise `False`
|
|
420
284
|
"""
|
|
421
|
-
|
|
422
|
-
return True
|
|
423
|
-
return False
|
|
285
|
+
return self.body.failed_to_parse
|
|
424
286
|
|
|
425
287
|
@cached_property
|
|
426
288
|
def is_parked(self) -> bool:
|
|
427
|
-
|
|
428
|
-
return True
|
|
429
|
-
return False
|
|
430
|
-
|
|
431
|
-
@cached_property
|
|
432
|
-
def failed_to_parse(self) -> bool:
|
|
433
|
-
"""
|
|
434
|
-
Did this document entirely fail to parse?
|
|
435
|
-
|
|
436
|
-
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
437
|
-
"""
|
|
438
|
-
if "error" in self.xml.root_element:
|
|
439
|
-
return True
|
|
440
|
-
return False
|
|
289
|
+
return "parked" in self.uri
|
|
441
290
|
|
|
442
291
|
@cached_property
|
|
443
292
|
def has_name(self) -> bool:
|
|
444
|
-
|
|
445
|
-
return False
|
|
446
|
-
|
|
447
|
-
return True
|
|
293
|
+
return bool(self.body.name)
|
|
448
294
|
|
|
449
295
|
@cached_property
|
|
450
296
|
def has_valid_court(self) -> bool:
|
|
451
297
|
try:
|
|
452
298
|
return bool(
|
|
453
|
-
courts.get_by_code(self.court_and_jurisdiction_identifier_string),
|
|
299
|
+
courts.get_by_code(self.body.court_and_jurisdiction_identifier_string),
|
|
454
300
|
)
|
|
455
301
|
except CourtNotFoundException:
|
|
456
302
|
return False
|
|
@@ -521,9 +367,7 @@ class Document:
|
|
|
521
367
|
"""
|
|
522
368
|
Is it sensible to enrich this document?
|
|
523
369
|
"""
|
|
524
|
-
|
|
525
|
-
return True
|
|
526
|
-
return False
|
|
370
|
+
return (self.enriched_recently is False) and self.validates_against_schema
|
|
527
371
|
|
|
528
372
|
@cached_property
|
|
529
373
|
def enriched_recently(self) -> bool:
|
|
@@ -531,14 +375,13 @@ class Document:
|
|
|
531
375
|
Has this document been enriched recently?
|
|
532
376
|
"""
|
|
533
377
|
|
|
534
|
-
last_enrichment = self.enrichment_datetime
|
|
378
|
+
last_enrichment = self.body.enrichment_datetime
|
|
535
379
|
if not last_enrichment:
|
|
536
380
|
return False
|
|
537
381
|
|
|
538
382
|
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
return False
|
|
383
|
+
|
|
384
|
+
return now - last_enrichment < MINIMUM_ENRICHMENT_TIME
|
|
542
385
|
|
|
543
386
|
@cached_property
|
|
544
387
|
def validates_against_schema(self) -> bool:
|
|
@@ -599,10 +442,7 @@ class Document:
|
|
|
599
442
|
else:
|
|
600
443
|
raise DocumentNotSafeForDeletion
|
|
601
444
|
|
|
602
|
-
def
|
|
603
|
-
self.api_client.overwrite_document(self.uri, new_citation)
|
|
604
|
-
|
|
605
|
-
def move(self, new_citation: str) -> None:
|
|
445
|
+
def move(self, new_citation: NeutralCitationString) -> None:
|
|
606
446
|
self.api_client.update_document_uri(self.uri, new_citation)
|
|
607
447
|
|
|
608
448
|
def force_reparse(self) -> None:
|
|
@@ -612,7 +452,11 @@ class Document:
|
|
|
612
452
|
self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
|
|
613
453
|
|
|
614
454
|
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
|
|
615
|
-
checked_date =
|
|
455
|
+
checked_date: Optional[str] = (
|
|
456
|
+
self.body.document_date_as_date.isoformat()
|
|
457
|
+
if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
|
|
458
|
+
else None
|
|
459
|
+
)
|
|
616
460
|
|
|
617
461
|
# the keys of parser_instructions should exactly match the parser output
|
|
618
462
|
# in the *-metadata.json files by the parser. Whilst typically empty
|
|
@@ -621,9 +465,9 @@ class Document:
|
|
|
621
465
|
parser_instructions: ParserInstructionsDict = {
|
|
622
466
|
"documentType": parser_type_noun,
|
|
623
467
|
"metadata": {
|
|
624
|
-
"name": self.name or None,
|
|
468
|
+
"name": self.body.name or None,
|
|
625
469
|
"cite": self.best_human_identifier or None,
|
|
626
|
-
"court": self.court or None,
|
|
470
|
+
"court": self.body.court or None,
|
|
627
471
|
"date": checked_date,
|
|
628
472
|
"uri": self.uri,
|
|
629
473
|
},
|
|
@@ -650,41 +494,4 @@ class Document:
|
|
|
650
494
|
"""
|
|
651
495
|
Is it sensible to reparse this document?
|
|
652
496
|
"""
|
|
653
|
-
|
|
654
|
-
return True
|
|
655
|
-
return False
|
|
656
|
-
|
|
657
|
-
class XML:
|
|
658
|
-
"""
|
|
659
|
-
Represents the XML of a document, and should contain all methods for interacting with it.
|
|
660
|
-
"""
|
|
661
|
-
|
|
662
|
-
def __init__(self, xml_bytestring: bytes):
|
|
663
|
-
"""
|
|
664
|
-
:raises NonXMLDocumentError: This document is not valid XML
|
|
665
|
-
"""
|
|
666
|
-
try:
|
|
667
|
-
self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
|
|
668
|
-
except etree.XMLSyntaxError:
|
|
669
|
-
raise NonXMLDocumentError
|
|
670
|
-
|
|
671
|
-
@property
|
|
672
|
-
def xml_as_string(self) -> str:
|
|
673
|
-
"""
|
|
674
|
-
:return: A string representation of this document's XML tree.
|
|
675
|
-
"""
|
|
676
|
-
return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
|
|
677
|
-
|
|
678
|
-
@property
|
|
679
|
-
def root_element(self) -> str:
|
|
680
|
-
return str(self.xml_as_tree.tag)
|
|
681
|
-
|
|
682
|
-
def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
|
|
683
|
-
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
684
|
-
|
|
685
|
-
def get_xpath_match_strings(
|
|
686
|
-
self,
|
|
687
|
-
xpath: str,
|
|
688
|
-
namespaces: Dict[str, str],
|
|
689
|
-
) -> list[str]:
|
|
690
|
-
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
497
|
+
return self.docx_exists()
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import warnings
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import pytz
|
|
7
|
+
from ds_caselaw_utils.types import CourtCode
|
|
8
|
+
|
|
9
|
+
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
10
|
+
|
|
11
|
+
from .xml import XML
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class UnparsableDate(Warning):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DocumentBody:
|
|
19
|
+
"""
|
|
20
|
+
A class for abstracting out interactions with the body of a document.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, xml_bytestring: bytes):
|
|
24
|
+
self._xml = XML(xml_bytestring=xml_bytestring)
|
|
25
|
+
""" This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
|
|
26
|
+
|
|
27
|
+
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
|
|
28
|
+
return self._xml.get_xpath_match_string(xpath, namespaces)
|
|
29
|
+
|
|
30
|
+
@cached_property
|
|
31
|
+
def name(self) -> str:
|
|
32
|
+
return self._xml.get_xpath_match_string(
|
|
33
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
|
|
34
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@cached_property
|
|
38
|
+
def court(self) -> str:
|
|
39
|
+
return self._xml.get_xpath_match_string(
|
|
40
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
41
|
+
{
|
|
42
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
43
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
@cached_property
|
|
48
|
+
def jurisdiction(self) -> str:
|
|
49
|
+
return self._xml.get_xpath_match_string(
|
|
50
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
|
|
51
|
+
{
|
|
52
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
53
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def court_and_jurisdiction_identifier_string(self) -> CourtCode:
|
|
59
|
+
if self.jurisdiction != "":
|
|
60
|
+
return CourtCode("/".join((self.court, self.jurisdiction)))
|
|
61
|
+
return CourtCode(self.court)
|
|
62
|
+
|
|
63
|
+
@cached_property
|
|
64
|
+
def document_date_as_string(self) -> str:
|
|
65
|
+
return self._xml.get_xpath_match_string(
|
|
66
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
67
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@cached_property
|
|
71
|
+
def document_date_as_date(self) -> Optional[datetime.date]:
|
|
72
|
+
if not self.document_date_as_string:
|
|
73
|
+
return None
|
|
74
|
+
try:
|
|
75
|
+
return datetime.datetime.strptime(
|
|
76
|
+
self.document_date_as_string,
|
|
77
|
+
"%Y-%m-%d",
|
|
78
|
+
).date()
|
|
79
|
+
except ValueError:
|
|
80
|
+
warnings.warn(
|
|
81
|
+
f"Unparsable date encountered: {self.document_date_as_string}",
|
|
82
|
+
UnparsableDate,
|
|
83
|
+
)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def get_manifestation_datetimes(
|
|
87
|
+
self,
|
|
88
|
+
name: Optional[str] = None,
|
|
89
|
+
) -> list[datetime.datetime]:
|
|
90
|
+
name_filter = f"[@name='{name}']" if name else ""
|
|
91
|
+
iso_datetimes = self._xml.get_xpath_match_strings(
|
|
92
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
|
|
93
|
+
f"/akn:FRBRdate{name_filter}/@date",
|
|
94
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
98
|
+
|
|
99
|
+
def get_latest_manifestation_datetime(
|
|
100
|
+
self,
|
|
101
|
+
name: Optional[str] = None,
|
|
102
|
+
) -> Optional[datetime.datetime]:
|
|
103
|
+
events = self.get_manifestation_datetimes(name)
|
|
104
|
+
if not events:
|
|
105
|
+
return None
|
|
106
|
+
return max(events)
|
|
107
|
+
|
|
108
|
+
def get_latest_manifestation_type(self) -> Optional[str]:
|
|
109
|
+
return max(
|
|
110
|
+
(
|
|
111
|
+
(type, time)
|
|
112
|
+
for type in ["transform", "tna-enriched"]
|
|
113
|
+
if (time := self.get_latest_manifestation_datetime(type))
|
|
114
|
+
),
|
|
115
|
+
key=lambda x: x[1],
|
|
116
|
+
)[0]
|
|
117
|
+
|
|
118
|
+
@cached_property
|
|
119
|
+
def transformation_datetime(self) -> Optional[datetime.datetime]:
|
|
120
|
+
"""When was this document successfully parsed or reparsed (date from XML)"""
|
|
121
|
+
return self.get_latest_manifestation_datetime("transform")
|
|
122
|
+
|
|
123
|
+
@cached_property
|
|
124
|
+
def enrichment_datetime(self) -> Optional[datetime.datetime]:
|
|
125
|
+
"""When was this document successfully enriched (date from XML)"""
|
|
126
|
+
return self.get_latest_manifestation_datetime("tna-enriched")
|
|
127
|
+
|
|
128
|
+
@cached_property
|
|
129
|
+
def content_as_xml(self) -> str:
|
|
130
|
+
return self._xml.xml_as_string
|
|
131
|
+
|
|
132
|
+
@cached_property
|
|
133
|
+
def failed_to_parse(self) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Did this document entirely fail to parse?
|
|
136
|
+
|
|
137
|
+
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
138
|
+
"""
|
|
139
|
+
return "error" in self._xml.root_element
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
class CannotPublishUnpublishableDocument(Exception):
|
|
2
|
+
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DocumentNotSafeForDeletion(Exception):
|
|
6
|
+
"""A document which is not safe for deletion cannot be deleted."""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
DOCUMENT_STATUS_HOLD = "On hold"
|
|
2
|
+
""" This document has been placed on hold to actively prevent publication. """
|
|
3
|
+
|
|
4
|
+
DOCUMENT_STATUS_PUBLISHED = "Published"
|
|
5
|
+
""" This document has been published and should be considered publicly visible. """
|
|
6
|
+
|
|
7
|
+
DOCUMENT_STATUS_IN_PROGRESS = "In progress"
|
|
8
|
+
""" This document has not been published or put on hold, and has been picked up by an editor and
|
|
9
|
+
should be progressing through the document pipeline. """
|
|
10
|
+
|
|
11
|
+
DOCUMENT_STATUS_NEW = "New"
|
|
12
|
+
""" This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
3
|
+
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NonXMLDocumentError(Exception):
|
|
7
|
+
"""A document cannot be parsed as XML."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class XML:
|
|
11
|
+
"""
|
|
12
|
+
A class for interacting with the raw XML of a document.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, xml_bytestring: bytes):
|
|
16
|
+
"""
|
|
17
|
+
:raises NonXMLDocumentError: This document is not valid XML
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
|
|
21
|
+
except etree.XMLSyntaxError:
|
|
22
|
+
raise NonXMLDocumentError
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def xml_as_string(self) -> str:
|
|
26
|
+
"""
|
|
27
|
+
:return: A string representation of this document's XML tree.
|
|
28
|
+
"""
|
|
29
|
+
return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def root_element(self) -> str:
|
|
33
|
+
return str(self.xml_as_tree.tag)
|
|
34
|
+
|
|
35
|
+
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
|
|
36
|
+
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
37
|
+
|
|
38
|
+
def get_xpath_match_strings(
|
|
39
|
+
self,
|
|
40
|
+
xpath: str,
|
|
41
|
+
namespaces: dict[str, str],
|
|
42
|
+
) -> list[str]:
|
|
43
|
+
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
@@ -2,13 +2,14 @@ import importlib
|
|
|
2
2
|
from functools import cached_property
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
6
|
+
|
|
5
7
|
from caselawclient.errors import DocumentNotFoundError
|
|
6
8
|
from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
7
9
|
|
|
8
10
|
if TYPE_CHECKING:
|
|
9
11
|
from caselawclient.models.press_summaries import PressSummary
|
|
10
12
|
|
|
11
|
-
from ..xml_helpers import get_xpath_match_string
|
|
12
13
|
from .documents import Document
|
|
13
14
|
|
|
14
15
|
|
|
@@ -21,17 +22,18 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
21
22
|
document_noun_plural = "judgments"
|
|
22
23
|
|
|
23
24
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
24
|
-
super(
|
|
25
|
+
super().__init__(self.document_noun, *args, **kwargs)
|
|
25
26
|
|
|
26
27
|
@cached_property
|
|
27
|
-
def neutral_citation(self) ->
|
|
28
|
-
return
|
|
29
|
-
self.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
28
|
+
def neutral_citation(self) -> NeutralCitationString:
|
|
29
|
+
return NeutralCitationString(
|
|
30
|
+
self.body.get_xpath_match_string(
|
|
31
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
|
|
32
|
+
{
|
|
33
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
34
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
35
|
+
},
|
|
36
|
+
)
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
@property
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
1
2
|
from functools import cached_property
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
from ds_caselaw_utils import neutral_url
|
|
6
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
5
7
|
|
|
6
8
|
|
|
7
|
-
class NeutralCitationMixin:
|
|
9
|
+
class NeutralCitationMixin(ABC):
|
|
8
10
|
"""
|
|
9
11
|
A mixin class that provides functionality related to neutral citation.
|
|
10
12
|
|
|
@@ -35,19 +37,13 @@ class NeutralCitationMixin:
|
|
|
35
37
|
super(NeutralCitationMixin, self).__init__(*args, **kwargs)
|
|
36
38
|
|
|
37
39
|
@cached_property
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def neutral_citation(self) -> NeutralCitationString: ...
|
|
40
42
|
|
|
41
43
|
@cached_property
|
|
42
44
|
def has_ncn(self) -> bool:
|
|
43
|
-
|
|
44
|
-
return False
|
|
45
|
-
|
|
46
|
-
return True
|
|
45
|
+
return bool(self.neutral_citation)
|
|
47
46
|
|
|
48
47
|
@cached_property
|
|
49
48
|
def has_valid_ncn(self) -> bool:
|
|
50
|
-
|
|
51
|
-
return False
|
|
52
|
-
|
|
53
|
-
return True
|
|
49
|
+
return self.has_ncn and neutral_url(self.neutral_citation) is not None
|
|
@@ -4,9 +4,10 @@ import importlib
|
|
|
4
4
|
from functools import cached_property
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Optional
|
|
6
6
|
|
|
7
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
8
|
+
|
|
7
9
|
from caselawclient.errors import DocumentNotFoundError
|
|
8
10
|
from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
9
|
-
from caselawclient.xml_helpers import get_xpath_match_string
|
|
10
11
|
|
|
11
12
|
from .documents import Document
|
|
12
13
|
|
|
@@ -23,16 +24,17 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
23
24
|
document_noun_plural = "press summaries"
|
|
24
25
|
|
|
25
26
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
26
|
-
super(
|
|
27
|
+
super().__init__(self.document_noun, *args, **kwargs)
|
|
27
28
|
|
|
28
29
|
@cached_property
|
|
29
|
-
def neutral_citation(self) ->
|
|
30
|
-
return
|
|
31
|
-
self.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
def neutral_citation(self) -> NeutralCitationString:
|
|
31
|
+
return NeutralCitationString(
|
|
32
|
+
self.body.get_xpath_match_string(
|
|
33
|
+
"/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
|
|
34
|
+
{
|
|
35
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
36
|
+
},
|
|
37
|
+
)
|
|
36
38
|
)
|
|
37
39
|
|
|
38
40
|
@property
|
|
@@ -9,11 +9,7 @@ def parse_string_date_as_utc(iso_string: str, timezone: tzinfo.BaseTzInfo) -> da
|
|
|
9
9
|
ensure that it is converted to a UTC-aware datetime"""
|
|
10
10
|
|
|
11
11
|
mixed_date = isoparse(iso_string)
|
|
12
|
-
|
|
13
|
-
# it is an unaware time
|
|
14
|
-
aware_date = timezone.localize(mixed_date)
|
|
15
|
-
else:
|
|
16
|
-
aware_date = mixed_date
|
|
12
|
+
aware_date = mixed_date if mixed_date.tzinfo else timezone.localize(mixed_date)
|
|
17
13
|
|
|
18
14
|
# make UTC
|
|
19
15
|
utc_date = aware_date.astimezone(UTC)
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
2
2
|
|
|
3
3
|
import ds_caselaw_utils as caselawutils
|
|
4
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
4
5
|
|
|
5
6
|
from caselawclient.errors import MarklogicAPIError
|
|
7
|
+
from caselawclient.models.documents import DocumentURIString
|
|
6
8
|
from caselawclient.models.utilities.aws import copy_assets
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
pass
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from caselawclient.Client import MarklogicApiClient
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
class
|
|
14
|
+
class NeutralCitationToUriError(Exception):
|
|
14
15
|
pass
|
|
15
16
|
|
|
16
17
|
|
|
@@ -18,62 +19,9 @@ class MoveJudgmentError(Exception):
|
|
|
18
19
|
pass
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def
|
|
22
|
-
source_uri:
|
|
23
|
-
|
|
24
|
-
api_client: Any,
|
|
25
|
-
) -> str:
|
|
26
|
-
"""Move the document at source_uri on top of the new citation, which must already exist
|
|
27
|
-
Compare to update_document_uri
|
|
28
|
-
|
|
29
|
-
:param source_uri: The URI with the contents of the document to be written. (possibly a failure url)
|
|
30
|
-
:param target_citation: The NCN (implying a URL) whose contents will be overwritten
|
|
31
|
-
:param api_client: An instance of MarklogicApiClient used to make the search request
|
|
32
|
-
:return: The URL associated with the `target_citation`
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
new_uri: Optional[str] = caselawutils.neutral_url(target_citation.strip())
|
|
36
|
-
|
|
37
|
-
if new_uri == source_uri:
|
|
38
|
-
raise RuntimeError(
|
|
39
|
-
f"Attempted to overwrite document {source_uri} with itself, which is not permitted.",
|
|
40
|
-
)
|
|
41
|
-
if new_uri is None:
|
|
42
|
-
raise NeutralCitationToUriError(
|
|
43
|
-
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}",
|
|
44
|
-
)
|
|
45
|
-
if not api_client.document_exists(new_uri):
|
|
46
|
-
raise OverwriteJudgmentError(
|
|
47
|
-
f"The URI {new_uri} generated from {target_citation} does not already exist, so cannot be overwritten",
|
|
48
|
-
)
|
|
49
|
-
old_doc = api_client.get_document_by_uri_or_404(source_uri)
|
|
50
|
-
try:
|
|
51
|
-
old_doc_xml = old_doc.content_as_xml
|
|
52
|
-
api_client.update_document_xml(
|
|
53
|
-
new_uri,
|
|
54
|
-
old_doc_xml,
|
|
55
|
-
annotation=f"overwritten from {source_uri}",
|
|
56
|
-
)
|
|
57
|
-
set_metadata(source_uri, new_uri, api_client)
|
|
58
|
-
# TODO: consider deleting existing public assets at that location
|
|
59
|
-
copy_assets(source_uri, new_uri)
|
|
60
|
-
api_client.set_judgment_this_uri(new_uri)
|
|
61
|
-
except MarklogicAPIError as e:
|
|
62
|
-
raise OverwriteJudgmentError(
|
|
63
|
-
f"Failure when attempting to copy judgment from {source_uri} to {new_uri}: {e}",
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
try:
|
|
67
|
-
api_client.delete_judgment(source_uri)
|
|
68
|
-
except MarklogicAPIError as e:
|
|
69
|
-
raise OverwriteJudgmentError(
|
|
70
|
-
f"Failure when attempting to delete judgment from {source_uri}: {e}",
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
return new_uri
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def update_document_uri(source_uri: str, target_citation: str, api_client: Any) -> str:
|
|
22
|
+
def update_document_uri(
|
|
23
|
+
source_uri: DocumentURIString, target_citation: NeutralCitationString, api_client: "MarklogicApiClient"
|
|
24
|
+
) -> DocumentURIString:
|
|
77
25
|
"""
|
|
78
26
|
Move the document at source_uri to the correct location based on the neutral citation
|
|
79
27
|
The new neutral citation *must* not already exist (that is handled elsewhere)
|
|
@@ -83,7 +31,8 @@ def update_document_uri(source_uri: str, target_citation: str, api_client: Any)
|
|
|
83
31
|
:param api_client: An instance of MarklogicApiClient used to make the search request
|
|
84
32
|
:return: The URL associated with the `target_citation`
|
|
85
33
|
"""
|
|
86
|
-
|
|
34
|
+
new_ncn_based_uri = caselawutils.neutral_url(target_citation)
|
|
35
|
+
new_uri: Optional[DocumentURIString] = DocumentURIString(new_ncn_based_uri) if new_ncn_based_uri else None
|
|
87
36
|
if new_uri is None:
|
|
88
37
|
raise NeutralCitationToUriError(
|
|
89
38
|
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}",
|
|
@@ -115,7 +64,7 @@ def update_document_uri(source_uri: str, target_citation: str, api_client: Any)
|
|
|
115
64
|
return new_uri
|
|
116
65
|
|
|
117
66
|
|
|
118
|
-
def set_metadata(old_uri:
|
|
67
|
+
def set_metadata(old_uri: DocumentURIString, new_uri: DocumentURIString, api_client: Any) -> None:
|
|
119
68
|
source_organisation = api_client.get_property(old_uri, "source-organisation")
|
|
120
69
|
source_name = api_client.get_property(old_uri, "source-name")
|
|
121
70
|
source_email = api_client.get_property(old_uri, "source-email")
|
|
@@ -24,13 +24,13 @@ class SearchResponse:
|
|
|
24
24
|
self.client = client
|
|
25
25
|
|
|
26
26
|
@property
|
|
27
|
-
def total(self) ->
|
|
27
|
+
def total(self) -> int:
|
|
28
28
|
"""
|
|
29
29
|
The total number of search results.
|
|
30
30
|
|
|
31
31
|
:return: The total number of search results
|
|
32
32
|
"""
|
|
33
|
-
return
|
|
33
|
+
return int(
|
|
34
34
|
self.node.xpath("//search:response/@total", namespaces=self.NAMESPACES)[0],
|
|
35
35
|
)
|
|
36
36
|
|
|
@@ -8,6 +8,7 @@ from typing import Dict, Optional
|
|
|
8
8
|
from dateutil import parser as dateparser
|
|
9
9
|
from dateutil.parser import ParserError
|
|
10
10
|
from ds_caselaw_utils.courts import Court, CourtNotFoundException, courts
|
|
11
|
+
from ds_caselaw_utils.types import CourtCode, JurisdictionCode
|
|
11
12
|
from lxml import etree
|
|
12
13
|
|
|
13
14
|
from caselawclient.Client import MarklogicApiClient
|
|
@@ -193,15 +194,16 @@ class SearchResult:
|
|
|
193
194
|
"""
|
|
194
195
|
:return: The court of the search result
|
|
195
196
|
"""
|
|
196
|
-
court = None
|
|
197
|
+
court: Optional[Court] = None
|
|
197
198
|
court_code = self._get_xpath_match_string("search:extracted/uk:court/text()")
|
|
198
199
|
jurisdiction_code = self._get_xpath_match_string(
|
|
199
200
|
"search:extracted/uk:jurisdiction/text()",
|
|
200
201
|
)
|
|
201
202
|
if jurisdiction_code:
|
|
202
|
-
court_code_with_jurisdiction = "%s/%s" % (court_code, jurisdiction_code)
|
|
203
203
|
try:
|
|
204
|
-
court = courts.
|
|
204
|
+
court = courts.get_court_with_jurisdiction_by_code(
|
|
205
|
+
CourtCode(court_code), JurisdictionCode(jurisdiction_code)
|
|
206
|
+
)
|
|
205
207
|
except CourtNotFoundException:
|
|
206
208
|
logging.warning(
|
|
207
209
|
"Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court."
|
|
@@ -209,7 +211,7 @@ class SearchResult:
|
|
|
209
211
|
)
|
|
210
212
|
if court is None:
|
|
211
213
|
try:
|
|
212
|
-
court = courts.get_by_code(court_code)
|
|
214
|
+
court = courts.get_by_code(CourtCode(court_code))
|
|
213
215
|
except CourtNotFoundException:
|
|
214
216
|
logging.warning(
|
|
215
217
|
"Court not found with court code %s for judgment with NCN %s, returning None."
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 27.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -14,7 +14,7 @@ Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
|
14
14
|
Requires-Dist: certifi (>=2024.8.30,<2024.9.0)
|
|
15
15
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: django-environ (>=0.11.0,<0.12.0)
|
|
17
|
-
Requires-Dist: ds-caselaw-utils (>=
|
|
17
|
+
Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
|
|
18
18
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
19
19
|
Requires-Dist: lxml (>=5.0.0,<6.0.0)
|
|
20
20
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
@@ -1,22 +1,27 @@
|
|
|
1
|
-
caselawclient/Client.py,sha256=
|
|
1
|
+
caselawclient/Client.py,sha256=9tNNo_-qNxIn5E3OUJLg3apVvUie8VfyRfPmnzeqc-0,40182
|
|
2
2
|
caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
|
|
3
3
|
caselawclient/client_helpers/__init__.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
|
|
4
4
|
caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
|
|
5
5
|
caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
|
|
6
6
|
caselawclient/errors.py,sha256=tV0vs3wYSd331BzmfuRiZV6GAdsd91rtN65ymRaSx3s,3164
|
|
7
|
+
caselawclient/factories.py,sha256=nmKJLlTH5tN6sDCBz3abgJdGnzQlaPXIOm8aBgTZTaU,5260
|
|
7
8
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
8
|
-
caselawclient/models/documents.py,sha256=
|
|
9
|
-
caselawclient/models/
|
|
10
|
-
caselawclient/models/
|
|
11
|
-
caselawclient/models/
|
|
9
|
+
caselawclient/models/documents/__init__.py,sha256=EwfkYMr5HnmdtxijKwU6AV27Y4JAmobdNHL9Cp--bBs,16624
|
|
10
|
+
caselawclient/models/documents/body.py,sha256=0o8qL7oJ40VikNAgqS41phQyB8Jtfz93eK1KyR4k3F0,4791
|
|
11
|
+
caselawclient/models/documents/exceptions.py,sha256=Mz1P8uNqf5w6uLnRwJt6xK7efsVqtd5VA-WXUUH7QLk,285
|
|
12
|
+
caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
|
|
13
|
+
caselawclient/models/documents/xml.py,sha256=afEsgcnTThqW_gKYq-VGtFr4ovOoT2J7h2gXX7F8BbE,1267
|
|
14
|
+
caselawclient/models/judgments.py,sha256=SuCNtOD4LElp37df4dvhaD0umTowioWH0sZNmBgFsoE,1739
|
|
15
|
+
caselawclient/models/neutral_citation_mixin.py,sha256=5ktKCPIDidVRwxVTzx5e242O1BxOdP--1dnatZyTbYI,1773
|
|
16
|
+
caselawclient/models/press_summaries.py,sha256=ZJ5ZhamPTsj6-vO1g96aP_syzUC2RlLMhgr3xnI1PoM,1703
|
|
12
17
|
caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
|
|
13
|
-
caselawclient/models/utilities/aws.py,sha256
|
|
14
|
-
caselawclient/models/utilities/dates.py,sha256=
|
|
15
|
-
caselawclient/models/utilities/move.py,sha256=
|
|
18
|
+
caselawclient/models/utilities/aws.py,sha256=YQeuFdF5NvhUxo3Ejj3PURDlygA73oq2T42ltuQZ6Oo,8073
|
|
19
|
+
caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
|
|
20
|
+
caselawclient/models/utilities/move.py,sha256=Rsx1eGHVjbGz0WMVDjy8b_5t4Ig8aP55sLudL07MVUs,3621
|
|
16
21
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
22
|
caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
|
|
18
|
-
caselawclient/responses/search_response.py,sha256=
|
|
19
|
-
caselawclient/responses/search_result.py,sha256=
|
|
23
|
+
caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
|
|
24
|
+
caselawclient/responses/search_result.py,sha256=2yR3FP4CQsVymE7RrOMbh1owjYaRTrqkjMObbIkSlhE,8216
|
|
20
25
|
caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
|
|
21
26
|
caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
|
|
22
27
|
caselawclient/xml_helpers.py,sha256=qmqdGhwrQ-zhvvB-8akwzWnC2uHponKkEnnRExqkx_A,591
|
|
@@ -60,7 +65,7 @@ caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21q
|
|
|
60
65
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
61
66
|
caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
|
|
62
67
|
caselawclient/xquery_type_dicts.py,sha256=YOrXbEYJU84S-YwergCI12OL5Wrn_wpqMcqWpsQrKek,5590
|
|
63
|
-
ds_caselaw_marklogic_api_client-
|
|
64
|
-
ds_caselaw_marklogic_api_client-
|
|
65
|
-
ds_caselaw_marklogic_api_client-
|
|
66
|
-
ds_caselaw_marklogic_api_client-
|
|
68
|
+
ds_caselaw_marklogic_api_client-27.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
69
|
+
ds_caselaw_marklogic_api_client-27.0.0.dist-info/METADATA,sha256=miZ3RSM382gJvUf0AH3Xe9Y5yzEqF3r4OkIDj5rXiZg,4189
|
|
70
|
+
ds_caselaw_marklogic_api_client-27.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
71
|
+
ds_caselaw_marklogic_api_client-27.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|