ds-caselaw-marklogic-api-client 25.0.0__py3-none-any.whl → 26.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caselawclient/models/{documents.py → documents/__init__.py} +26 -205
- caselawclient/models/documents/body.py +142 -0
- caselawclient/models/documents/exceptions.py +6 -0
- caselawclient/models/documents/statuses.py +12 -0
- caselawclient/models/documents/xml.py +43 -0
- caselawclient/models/judgments.py +1 -3
- caselawclient/models/press_summaries.py +1 -3
- caselawclient/models/utilities/aws.py +2 -0
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-26.0.0.dist-info}/METADATA +1 -1
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-26.0.0.dist-info}/RECORD +12 -8
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-26.0.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-25.0.0.dist-info → ds_caselaw_marklogic_api_client-26.0.0.dist-info}/WHEEL +0 -0
|
@@ -1,27 +1,21 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import warnings
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, NewType, Optional
|
|
5
5
|
|
|
6
|
-
import pytz
|
|
7
6
|
from ds_caselaw_utils import courts
|
|
8
7
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
9
|
-
from lxml import etree
|
|
10
8
|
from lxml import html as html_parser
|
|
11
9
|
from requests_toolbelt.multipart import decoder
|
|
12
10
|
|
|
13
|
-
from caselawclient.
|
|
14
|
-
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
15
|
-
|
|
16
|
-
from ..errors import (
|
|
11
|
+
from caselawclient.errors import (
|
|
17
12
|
DocumentNotFoundError,
|
|
18
13
|
GatewayTimeoutError,
|
|
19
14
|
NotSupportedOnVersion,
|
|
20
15
|
OnlySupportedOnVersion,
|
|
21
16
|
)
|
|
22
|
-
from
|
|
23
|
-
from .utilities import
|
|
24
|
-
from .utilities.aws import (
|
|
17
|
+
from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
|
|
18
|
+
from caselawclient.models.utilities.aws import (
|
|
25
19
|
ParserInstructionsDict,
|
|
26
20
|
announce_document_event,
|
|
27
21
|
check_docx_exists,
|
|
@@ -34,31 +28,17 @@ from .utilities.aws import (
|
|
|
34
28
|
uri_for_s3,
|
|
35
29
|
)
|
|
36
30
|
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
from .body import DocumentBody
|
|
32
|
+
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
33
|
+
from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
pass
|
|
35
|
+
MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
|
|
42
36
|
|
|
43
37
|
|
|
44
38
|
class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
|
|
45
39
|
pass
|
|
46
40
|
|
|
47
41
|
|
|
48
|
-
DOCUMENT_STATUS_HOLD = "On hold"
|
|
49
|
-
""" This document has been placed on hold to actively prevent publication. """
|
|
50
|
-
|
|
51
|
-
DOCUMENT_STATUS_PUBLISHED = "Published"
|
|
52
|
-
""" This document has been published and should be considered publicly visible. """
|
|
53
|
-
|
|
54
|
-
DOCUMENT_STATUS_IN_PROGRESS = "In progress"
|
|
55
|
-
""" This document has not been published or put on hold, and has been picked up by an editor and
|
|
56
|
-
should be progressing through the document pipeline. """
|
|
57
|
-
|
|
58
|
-
DOCUMENT_STATUS_NEW = "New"
|
|
59
|
-
""" This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
|
|
60
|
-
|
|
61
|
-
|
|
62
42
|
DOCUMENT_COLLECTION_URI_JUDGMENT = "judgment"
|
|
63
43
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY = "press-summary"
|
|
64
44
|
|
|
@@ -67,19 +47,6 @@ if TYPE_CHECKING:
|
|
|
67
47
|
|
|
68
48
|
|
|
69
49
|
DocumentURIString = NewType("DocumentURIString", str)
|
|
70
|
-
CourtIdentifierString = NewType("CourtIdentifierString", str)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class CannotPublishUnpublishableDocument(Exception):
|
|
74
|
-
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class DocumentNotSafeForDeletion(Exception):
|
|
78
|
-
"""A document which is not safe for deletion cannot be deleted."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class NonXMLDocumentError(Exception):
|
|
82
|
-
"""A document cannot be parsed as XML."""
|
|
83
50
|
|
|
84
51
|
|
|
85
52
|
class Document:
|
|
@@ -96,7 +63,7 @@ class Document:
|
|
|
96
63
|
|
|
97
64
|
attributes_to_validate: list[tuple[str, bool, str]] = [
|
|
98
65
|
(
|
|
99
|
-
"
|
|
66
|
+
"is_failure",
|
|
100
67
|
False,
|
|
101
68
|
"This document failed to parse",
|
|
102
69
|
),
|
|
@@ -143,20 +110,18 @@ class Document:
|
|
|
143
110
|
|
|
144
111
|
:raises DocumentNotFoundError: The document does not exist within MarkLogic
|
|
145
112
|
"""
|
|
146
|
-
self.uri = DocumentURIString(uri.strip("/"))
|
|
147
|
-
self.api_client = api_client
|
|
113
|
+
self.uri: DocumentURIString = DocumentURIString(uri.strip("/"))
|
|
114
|
+
self.api_client: MarklogicApiClient = api_client
|
|
148
115
|
if not self.document_exists():
|
|
149
116
|
raise DocumentNotFoundError(f"Document {self.uri} does not exist")
|
|
150
117
|
|
|
151
|
-
self.
|
|
152
|
-
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
153
|
-
self.uri,
|
|
154
|
-
show_unpublished=True,
|
|
155
|
-
),
|
|
118
|
+
self.body: DocumentBody = DocumentBody(
|
|
119
|
+
xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
|
|
156
120
|
)
|
|
121
|
+
""" `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
|
|
157
122
|
|
|
158
123
|
def __repr__(self) -> str:
|
|
159
|
-
name = self.name or "un-named"
|
|
124
|
+
name = self.body.name or "un-named"
|
|
160
125
|
return f"<{self.document_noun} {self.uri}: {name}>"
|
|
161
126
|
|
|
162
127
|
def document_exists(self) -> bool:
|
|
@@ -186,104 +151,6 @@ class Document:
|
|
|
186
151
|
"""
|
|
187
152
|
return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
|
|
188
153
|
|
|
189
|
-
@cached_property
|
|
190
|
-
def name(self) -> str:
|
|
191
|
-
return self.xml.get_xpath_match_string(
|
|
192
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
|
|
193
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
@cached_property
|
|
197
|
-
def court(self) -> str:
|
|
198
|
-
return self.xml.get_xpath_match_string(
|
|
199
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
200
|
-
{
|
|
201
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
202
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
203
|
-
},
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
@cached_property
|
|
207
|
-
def jurisdiction(self) -> str:
|
|
208
|
-
return self.xml.get_xpath_match_string(
|
|
209
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
|
|
210
|
-
{
|
|
211
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
212
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
213
|
-
},
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
@property
|
|
217
|
-
def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
|
|
218
|
-
if self.jurisdiction != "":
|
|
219
|
-
return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
|
|
220
|
-
return CourtIdentifierString(self.court)
|
|
221
|
-
|
|
222
|
-
@cached_property
|
|
223
|
-
def document_date_as_string(self) -> str:
|
|
224
|
-
return self.xml.get_xpath_match_string(
|
|
225
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
226
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
@cached_property
|
|
230
|
-
def document_date_as_date(self) -> Optional[datetime.date]:
|
|
231
|
-
if not self.document_date_as_string:
|
|
232
|
-
return None
|
|
233
|
-
try:
|
|
234
|
-
return datetime.datetime.strptime(
|
|
235
|
-
self.document_date_as_string,
|
|
236
|
-
"%Y-%m-%d",
|
|
237
|
-
).date()
|
|
238
|
-
except ValueError:
|
|
239
|
-
warnings.warn(
|
|
240
|
-
f"Unparsable date encountered: {self.document_date_as_string}",
|
|
241
|
-
UnparsableDate,
|
|
242
|
-
)
|
|
243
|
-
return None
|
|
244
|
-
|
|
245
|
-
def get_manifestation_datetimes(
|
|
246
|
-
self,
|
|
247
|
-
name: Optional[str] = None,
|
|
248
|
-
) -> list[datetime.datetime]:
|
|
249
|
-
name_filter = f"[@name='{name}']" if name else ""
|
|
250
|
-
iso_datetimes = self.xml.get_xpath_match_strings(
|
|
251
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
|
|
252
|
-
f"/akn:FRBRdate{name_filter}/@date",
|
|
253
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
257
|
-
|
|
258
|
-
def get_latest_manifestation_datetime(
|
|
259
|
-
self,
|
|
260
|
-
name: Optional[str] = None,
|
|
261
|
-
) -> Optional[datetime.datetime]:
|
|
262
|
-
events = self.get_manifestation_datetimes(name)
|
|
263
|
-
if not events:
|
|
264
|
-
return None
|
|
265
|
-
return max(events)
|
|
266
|
-
|
|
267
|
-
def get_latest_manifestation_type(self) -> Optional[str]:
|
|
268
|
-
return max(
|
|
269
|
-
(
|
|
270
|
-
(type, time)
|
|
271
|
-
for type in ["transform", "tna-enriched"]
|
|
272
|
-
if (time := self.get_latest_manifestation_datetime(type))
|
|
273
|
-
),
|
|
274
|
-
key=lambda x: x[1],
|
|
275
|
-
)[0]
|
|
276
|
-
|
|
277
|
-
@cached_property
|
|
278
|
-
def transformation_datetime(self) -> Optional[datetime.datetime]:
|
|
279
|
-
"""When was this document successfully parsed or reparsed (date from XML)"""
|
|
280
|
-
return self.get_latest_manifestation_datetime("transform")
|
|
281
|
-
|
|
282
|
-
@cached_property
|
|
283
|
-
def enrichment_datetime(self) -> Optional[datetime.datetime]:
|
|
284
|
-
"""When was this document successfully enriched (date from XML)"""
|
|
285
|
-
return self.get_latest_manifestation_datetime("tna-enriched")
|
|
286
|
-
|
|
287
154
|
@cached_property
|
|
288
155
|
def is_published(self) -> bool:
|
|
289
156
|
return self.api_client.get_published(self.uri)
|
|
@@ -372,10 +239,6 @@ class Document:
|
|
|
372
239
|
"Is this document a potentially historic version of a document, or is it the main document itself?"
|
|
373
240
|
return extract_version(self.uri) != 0
|
|
374
241
|
|
|
375
|
-
@cached_property
|
|
376
|
-
def content_as_xml(self) -> str:
|
|
377
|
-
return self.xml.xml_as_string
|
|
378
|
-
|
|
379
242
|
def content_as_html(
|
|
380
243
|
self,
|
|
381
244
|
version_uri: Optional[DocumentURIString] = None,
|
|
@@ -418,7 +281,7 @@ class Document:
|
|
|
418
281
|
|
|
419
282
|
:return: `True` if this document is in a 'failure' state, otherwise `False`
|
|
420
283
|
"""
|
|
421
|
-
if self.failed_to_parse:
|
|
284
|
+
if self.body.failed_to_parse:
|
|
422
285
|
return True
|
|
423
286
|
return False
|
|
424
287
|
|
|
@@ -428,20 +291,9 @@ class Document:
|
|
|
428
291
|
return True
|
|
429
292
|
return False
|
|
430
293
|
|
|
431
|
-
@cached_property
|
|
432
|
-
def failed_to_parse(self) -> bool:
|
|
433
|
-
"""
|
|
434
|
-
Did this document entirely fail to parse?
|
|
435
|
-
|
|
436
|
-
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
437
|
-
"""
|
|
438
|
-
if "error" in self.xml.root_element:
|
|
439
|
-
return True
|
|
440
|
-
return False
|
|
441
|
-
|
|
442
294
|
@cached_property
|
|
443
295
|
def has_name(self) -> bool:
|
|
444
|
-
if not self.name:
|
|
296
|
+
if not self.body.name:
|
|
445
297
|
return False
|
|
446
298
|
|
|
447
299
|
return True
|
|
@@ -450,7 +302,7 @@ class Document:
|
|
|
450
302
|
def has_valid_court(self) -> bool:
|
|
451
303
|
try:
|
|
452
304
|
return bool(
|
|
453
|
-
courts.get_by_code(self.court_and_jurisdiction_identifier_string),
|
|
305
|
+
courts.get_by_code(self.body.court_and_jurisdiction_identifier_string),
|
|
454
306
|
)
|
|
455
307
|
except CourtNotFoundException:
|
|
456
308
|
return False
|
|
@@ -531,7 +383,7 @@ class Document:
|
|
|
531
383
|
Has this document been enriched recently?
|
|
532
384
|
"""
|
|
533
385
|
|
|
534
|
-
last_enrichment = self.enrichment_datetime
|
|
386
|
+
last_enrichment = self.body.enrichment_datetime
|
|
535
387
|
if not last_enrichment:
|
|
536
388
|
return False
|
|
537
389
|
|
|
@@ -612,7 +464,11 @@ class Document:
|
|
|
612
464
|
self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
|
|
613
465
|
|
|
614
466
|
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
|
|
615
|
-
checked_date =
|
|
467
|
+
checked_date: Optional[str] = (
|
|
468
|
+
self.body.document_date_as_date.isoformat()
|
|
469
|
+
if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
|
|
470
|
+
else None
|
|
471
|
+
)
|
|
616
472
|
|
|
617
473
|
# the keys of parser_instructions should exactly match the parser output
|
|
618
474
|
# in the *-metadata.json files by the parser. Whilst typically empty
|
|
@@ -621,9 +477,9 @@ class Document:
|
|
|
621
477
|
parser_instructions: ParserInstructionsDict = {
|
|
622
478
|
"documentType": parser_type_noun,
|
|
623
479
|
"metadata": {
|
|
624
|
-
"name": self.name or None,
|
|
480
|
+
"name": self.body.name or None,
|
|
625
481
|
"cite": self.best_human_identifier or None,
|
|
626
|
-
"court": self.court or None,
|
|
482
|
+
"court": self.body.court or None,
|
|
627
483
|
"date": checked_date,
|
|
628
484
|
"uri": self.uri,
|
|
629
485
|
},
|
|
@@ -653,38 +509,3 @@ class Document:
|
|
|
653
509
|
if self.docx_exists():
|
|
654
510
|
return True
|
|
655
511
|
return False
|
|
656
|
-
|
|
657
|
-
class XML:
|
|
658
|
-
"""
|
|
659
|
-
Represents the XML of a document, and should contain all methods for interacting with it.
|
|
660
|
-
"""
|
|
661
|
-
|
|
662
|
-
def __init__(self, xml_bytestring: bytes):
|
|
663
|
-
"""
|
|
664
|
-
:raises NonXMLDocumentError: This document is not valid XML
|
|
665
|
-
"""
|
|
666
|
-
try:
|
|
667
|
-
self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
|
|
668
|
-
except etree.XMLSyntaxError:
|
|
669
|
-
raise NonXMLDocumentError
|
|
670
|
-
|
|
671
|
-
@property
|
|
672
|
-
def xml_as_string(self) -> str:
|
|
673
|
-
"""
|
|
674
|
-
:return: A string representation of this document's XML tree.
|
|
675
|
-
"""
|
|
676
|
-
return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
|
|
677
|
-
|
|
678
|
-
@property
|
|
679
|
-
def root_element(self) -> str:
|
|
680
|
-
return str(self.xml_as_tree.tag)
|
|
681
|
-
|
|
682
|
-
def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
|
|
683
|
-
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
684
|
-
|
|
685
|
-
def get_xpath_match_strings(
|
|
686
|
-
self,
|
|
687
|
-
xpath: str,
|
|
688
|
-
namespaces: Dict[str, str],
|
|
689
|
-
) -> list[str]:
|
|
690
|
-
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import warnings
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import NewType, Optional
|
|
5
|
+
|
|
6
|
+
import pytz
|
|
7
|
+
|
|
8
|
+
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
9
|
+
|
|
10
|
+
from .xml import XML
|
|
11
|
+
|
|
12
|
+
CourtIdentifierString = NewType("CourtIdentifierString", str)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class UnparsableDate(Warning):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DocumentBody:
|
|
20
|
+
"""
|
|
21
|
+
A class for abstracting out interactions with the body of a document.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, xml_bytestring: bytes):
|
|
25
|
+
self._xml = XML(xml_bytestring=xml_bytestring)
|
|
26
|
+
""" This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
|
|
27
|
+
|
|
28
|
+
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
|
|
29
|
+
return self._xml.get_xpath_match_string(xpath, namespaces)
|
|
30
|
+
|
|
31
|
+
@cached_property
|
|
32
|
+
def name(self) -> str:
|
|
33
|
+
return self._xml.get_xpath_match_string(
|
|
34
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
|
|
35
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@cached_property
|
|
39
|
+
def court(self) -> str:
|
|
40
|
+
return self._xml.get_xpath_match_string(
|
|
41
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
42
|
+
{
|
|
43
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
44
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
@cached_property
|
|
49
|
+
def jurisdiction(self) -> str:
|
|
50
|
+
return self._xml.get_xpath_match_string(
|
|
51
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
|
|
52
|
+
{
|
|
53
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
54
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
55
|
+
},
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
|
|
60
|
+
if self.jurisdiction != "":
|
|
61
|
+
return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
|
|
62
|
+
return CourtIdentifierString(self.court)
|
|
63
|
+
|
|
64
|
+
@cached_property
|
|
65
|
+
def document_date_as_string(self) -> str:
|
|
66
|
+
return self._xml.get_xpath_match_string(
|
|
67
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
68
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
@cached_property
|
|
72
|
+
def document_date_as_date(self) -> Optional[datetime.date]:
|
|
73
|
+
if not self.document_date_as_string:
|
|
74
|
+
return None
|
|
75
|
+
try:
|
|
76
|
+
return datetime.datetime.strptime(
|
|
77
|
+
self.document_date_as_string,
|
|
78
|
+
"%Y-%m-%d",
|
|
79
|
+
).date()
|
|
80
|
+
except ValueError:
|
|
81
|
+
warnings.warn(
|
|
82
|
+
f"Unparsable date encountered: {self.document_date_as_string}",
|
|
83
|
+
UnparsableDate,
|
|
84
|
+
)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def get_manifestation_datetimes(
|
|
88
|
+
self,
|
|
89
|
+
name: Optional[str] = None,
|
|
90
|
+
) -> list[datetime.datetime]:
|
|
91
|
+
name_filter = f"[@name='{name}']" if name else ""
|
|
92
|
+
iso_datetimes = self._xml.get_xpath_match_strings(
|
|
93
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
|
|
94
|
+
f"/akn:FRBRdate{name_filter}/@date",
|
|
95
|
+
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
99
|
+
|
|
100
|
+
def get_latest_manifestation_datetime(
|
|
101
|
+
self,
|
|
102
|
+
name: Optional[str] = None,
|
|
103
|
+
) -> Optional[datetime.datetime]:
|
|
104
|
+
events = self.get_manifestation_datetimes(name)
|
|
105
|
+
if not events:
|
|
106
|
+
return None
|
|
107
|
+
return max(events)
|
|
108
|
+
|
|
109
|
+
def get_latest_manifestation_type(self) -> Optional[str]:
|
|
110
|
+
return max(
|
|
111
|
+
(
|
|
112
|
+
(type, time)
|
|
113
|
+
for type in ["transform", "tna-enriched"]
|
|
114
|
+
if (time := self.get_latest_manifestation_datetime(type))
|
|
115
|
+
),
|
|
116
|
+
key=lambda x: x[1],
|
|
117
|
+
)[0]
|
|
118
|
+
|
|
119
|
+
@cached_property
|
|
120
|
+
def transformation_datetime(self) -> Optional[datetime.datetime]:
|
|
121
|
+
"""When was this document successfully parsed or reparsed (date from XML)"""
|
|
122
|
+
return self.get_latest_manifestation_datetime("transform")
|
|
123
|
+
|
|
124
|
+
@cached_property
|
|
125
|
+
def enrichment_datetime(self) -> Optional[datetime.datetime]:
|
|
126
|
+
"""When was this document successfully enriched (date from XML)"""
|
|
127
|
+
return self.get_latest_manifestation_datetime("tna-enriched")
|
|
128
|
+
|
|
129
|
+
@cached_property
|
|
130
|
+
def content_as_xml(self) -> str:
|
|
131
|
+
return self._xml.xml_as_string
|
|
132
|
+
|
|
133
|
+
@cached_property
|
|
134
|
+
def failed_to_parse(self) -> bool:
|
|
135
|
+
"""
|
|
136
|
+
Did this document entirely fail to parse?
|
|
137
|
+
|
|
138
|
+
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
139
|
+
"""
|
|
140
|
+
if "error" in self._xml.root_element:
|
|
141
|
+
return True
|
|
142
|
+
return False
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
class CannotPublishUnpublishableDocument(Exception):
|
|
2
|
+
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DocumentNotSafeForDeletion(Exception):
|
|
6
|
+
"""A document which is not safe for deletion cannot be deleted."""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
DOCUMENT_STATUS_HOLD = "On hold"
|
|
2
|
+
""" This document has been placed on hold to actively prevent publication. """
|
|
3
|
+
|
|
4
|
+
DOCUMENT_STATUS_PUBLISHED = "Published"
|
|
5
|
+
""" This document has been published and should be considered publicly visible. """
|
|
6
|
+
|
|
7
|
+
DOCUMENT_STATUS_IN_PROGRESS = "In progress"
|
|
8
|
+
""" This document has not been published or put on hold, and has been picked up by an editor and
|
|
9
|
+
should be progressing through the document pipeline. """
|
|
10
|
+
|
|
11
|
+
DOCUMENT_STATUS_NEW = "New"
|
|
12
|
+
""" This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
3
|
+
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NonXMLDocumentError(Exception):
|
|
7
|
+
"""A document cannot be parsed as XML."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class XML:
|
|
11
|
+
"""
|
|
12
|
+
A class for interacting with the raw XML of a document.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, xml_bytestring: bytes):
|
|
16
|
+
"""
|
|
17
|
+
:raises NonXMLDocumentError: This document is not valid XML
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
|
|
21
|
+
except etree.XMLSyntaxError:
|
|
22
|
+
raise NonXMLDocumentError
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def xml_as_string(self) -> str:
|
|
26
|
+
"""
|
|
27
|
+
:return: A string representation of this document's XML tree.
|
|
28
|
+
"""
|
|
29
|
+
return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def root_element(self) -> str:
|
|
33
|
+
return str(self.xml_as_tree.tag)
|
|
34
|
+
|
|
35
|
+
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
|
|
36
|
+
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
37
|
+
|
|
38
|
+
def get_xpath_match_strings(
|
|
39
|
+
self,
|
|
40
|
+
xpath: str,
|
|
41
|
+
namespaces: dict[str, str],
|
|
42
|
+
) -> list[str]:
|
|
43
|
+
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
@@ -8,7 +8,6 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from caselawclient.models.press_summaries import PressSummary
|
|
10
10
|
|
|
11
|
-
from ..xml_helpers import get_xpath_match_string
|
|
12
11
|
from .documents import Document
|
|
13
12
|
|
|
14
13
|
|
|
@@ -25,8 +24,7 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
25
24
|
|
|
26
25
|
@cached_property
|
|
27
26
|
def neutral_citation(self) -> str:
|
|
28
|
-
return get_xpath_match_string(
|
|
29
|
-
self.xml.xml_as_tree,
|
|
27
|
+
return self.body.get_xpath_match_string(
|
|
30
28
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
|
|
31
29
|
{
|
|
32
30
|
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
6
6
|
|
|
7
7
|
from caselawclient.errors import DocumentNotFoundError
|
|
8
8
|
from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
9
|
-
from caselawclient.xml_helpers import get_xpath_match_string
|
|
10
9
|
|
|
11
10
|
from .documents import Document
|
|
12
11
|
|
|
@@ -27,8 +26,7 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
27
26
|
|
|
28
27
|
@cached_property
|
|
29
28
|
def neutral_citation(self) -> str:
|
|
30
|
-
return get_xpath_match_string(
|
|
31
|
-
self.xml.xml_as_tree,
|
|
29
|
+
return self.body.get_xpath_match_string(
|
|
32
30
|
"/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
|
|
33
31
|
{
|
|
34
32
|
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 26.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -5,12 +5,16 @@ caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLL
|
|
|
5
5
|
caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
|
|
6
6
|
caselawclient/errors.py,sha256=tV0vs3wYSd331BzmfuRiZV6GAdsd91rtN65ymRaSx3s,3164
|
|
7
7
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
8
|
-
caselawclient/models/documents.py,sha256=
|
|
9
|
-
caselawclient/models/
|
|
8
|
+
caselawclient/models/documents/__init__.py,sha256=QAff1Dfh6ma2tjUntYTJk4yy1PNp_pjy3TB1Yn04Hio,16919
|
|
9
|
+
caselawclient/models/documents/body.py,sha256=PWKljGNV4LCJXDT9nBbpVLrumvXVQWne8OIRULbikao,4896
|
|
10
|
+
caselawclient/models/documents/exceptions.py,sha256=Mz1P8uNqf5w6uLnRwJt6xK7efsVqtd5VA-WXUUH7QLk,285
|
|
11
|
+
caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
|
|
12
|
+
caselawclient/models/documents/xml.py,sha256=afEsgcnTThqW_gKYq-VGtFr4ovOoT2J7h2gXX7F8BbE,1267
|
|
13
|
+
caselawclient/models/judgments.py,sha256=bZMJl4XWft1TuHE_T7_LZrHzDOHQzJkggSqtsOW0vcg,1608
|
|
10
14
|
caselawclient/models/neutral_citation_mixin.py,sha256=cEQXq1bWaiu3Sy3QWyK5IesrMS186Kw5-CFfuVqnB-A,1730
|
|
11
|
-
caselawclient/models/press_summaries.py,sha256=
|
|
15
|
+
caselawclient/models/press_summaries.py,sha256=Lk6Oc3jJlifDaYNbhrxPn7jghBEKGXi1sK5kOwbefho,1580
|
|
12
16
|
caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
|
|
13
|
-
caselawclient/models/utilities/aws.py,sha256
|
|
17
|
+
caselawclient/models/utilities/aws.py,sha256=YQeuFdF5NvhUxo3Ejj3PURDlygA73oq2T42ltuQZ6Oo,8073
|
|
14
18
|
caselawclient/models/utilities/dates.py,sha256=f1ai7WiGELdqdiyFQ7gGjCXFO6sxz5g5hfc2_HRNNvE,563
|
|
15
19
|
caselawclient/models/utilities/move.py,sha256=Xi2uPzruMp_-40Z5pWcSd3AN6C9iiAj6A1YqZHjgeaI,5390
|
|
16
20
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -60,7 +64,7 @@ caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21q
|
|
|
60
64
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
61
65
|
caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
|
|
62
66
|
caselawclient/xquery_type_dicts.py,sha256=YOrXbEYJU84S-YwergCI12OL5Wrn_wpqMcqWpsQrKek,5590
|
|
63
|
-
ds_caselaw_marklogic_api_client-
|
|
64
|
-
ds_caselaw_marklogic_api_client-
|
|
65
|
-
ds_caselaw_marklogic_api_client-
|
|
66
|
-
ds_caselaw_marklogic_api_client-
|
|
67
|
+
ds_caselaw_marklogic_api_client-26.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
68
|
+
ds_caselaw_marklogic_api_client-26.0.0.dist-info/METADATA,sha256=RhoL4Y76-eG36rZsLV7vsT-SVqQlN7OnjMy3Y2dwCWY,4189
|
|
69
|
+
ds_caselaw_marklogic_api_client-26.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
70
|
+
ds_caselaw_marklogic_api_client-26.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|