ds-caselaw-marklogic-api-client 23.0.1__py3-none-any.whl → 24.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- caselawclient/Client.py +124 -76
- caselawclient/client_helpers/__init__.py +2 -2
- caselawclient/client_helpers/search_helpers.py +5 -3
- caselawclient/content_hash.py +2 -4
- caselawclient/errors.py +3 -6
- caselawclient/models/documents.py +39 -29
- caselawclient/models/judgments.py +1 -4
- caselawclient/models/neutral_citation_mixin.py +12 -15
- caselawclient/models/press_summaries.py +2 -5
- caselawclient/models/utilities/aws.py +9 -12
- caselawclient/models/utilities/dates.py +20 -0
- caselawclient/models/utilities/move.py +11 -10
- caselawclient/responses/search_response.py +4 -5
- caselawclient/responses/search_result.py +11 -10
- caselawclient/xml_helpers.py +3 -1
- caselawclient/xml_tools.py +6 -5
- {ds_caselaw_marklogic_api_client-23.0.1.dist-info → ds_caselaw_marklogic_api_client-24.0.0.dist-info}/METADATA +13 -12
- {ds_caselaw_marklogic_api_client-23.0.1.dist-info → ds_caselaw_marklogic_api_client-24.0.0.dist-info}/RECORD +20 -19
- {ds_caselaw_marklogic_api_client-23.0.1.dist-info → ds_caselaw_marklogic_api_client-24.0.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-23.0.1.dist-info → ds_caselaw_marklogic_api_client-24.0.0.dist-info}/WHEEL +0 -0
|
@@ -3,6 +3,7 @@ import warnings
|
|
|
3
3
|
from functools import cached_property
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Dict, NewType, Optional
|
|
5
5
|
|
|
6
|
+
import pytz
|
|
6
7
|
from ds_caselaw_utils import courts
|
|
7
8
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
8
9
|
from lxml import etree
|
|
@@ -10,6 +11,7 @@ from lxml import html as html_parser
|
|
|
10
11
|
from requests_toolbelt.multipart import decoder
|
|
11
12
|
|
|
12
13
|
from caselawclient.models.utilities import extract_version
|
|
14
|
+
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
13
15
|
|
|
14
16
|
from ..errors import (
|
|
15
17
|
DocumentNotFoundError,
|
|
@@ -71,20 +73,14 @@ CourtIdentifierString = NewType("CourtIdentifierString", str)
|
|
|
71
73
|
class CannotPublishUnpublishableDocument(Exception):
|
|
72
74
|
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
73
75
|
|
|
74
|
-
pass
|
|
75
|
-
|
|
76
76
|
|
|
77
77
|
class DocumentNotSafeForDeletion(Exception):
|
|
78
78
|
"""A document which is not safe for deletion cannot be deleted."""
|
|
79
79
|
|
|
80
|
-
pass
|
|
81
|
-
|
|
82
80
|
|
|
83
81
|
class NonXMLDocumentError(Exception):
|
|
84
82
|
"""A document cannot be parsed as XML."""
|
|
85
83
|
|
|
86
|
-
pass
|
|
87
|
-
|
|
88
84
|
|
|
89
85
|
class Document:
|
|
90
86
|
"""
|
|
@@ -154,8 +150,9 @@ class Document:
|
|
|
154
150
|
|
|
155
151
|
self.xml = self.XML(
|
|
156
152
|
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
157
|
-
self.uri,
|
|
158
|
-
|
|
153
|
+
self.uri,
|
|
154
|
+
show_unpublished=True,
|
|
155
|
+
),
|
|
159
156
|
)
|
|
160
157
|
|
|
161
158
|
def __repr__(self) -> str:
|
|
@@ -187,7 +184,7 @@ class Document:
|
|
|
187
184
|
"""
|
|
188
185
|
:return: The absolute, public URI at which a copy of this document can be found
|
|
189
186
|
"""
|
|
190
|
-
return "https://caselaw.nationalarchives.gov.uk/{uri}"
|
|
187
|
+
return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
|
|
191
188
|
|
|
192
189
|
@cached_property
|
|
193
190
|
def name(self) -> str:
|
|
@@ -236,7 +233,8 @@ class Document:
|
|
|
236
233
|
return None
|
|
237
234
|
try:
|
|
238
235
|
return datetime.datetime.strptime(
|
|
239
|
-
self.document_date_as_string,
|
|
236
|
+
self.document_date_as_string,
|
|
237
|
+
"%Y-%m-%d",
|
|
240
238
|
).date()
|
|
241
239
|
except ValueError:
|
|
242
240
|
warnings.warn(
|
|
@@ -246,7 +244,8 @@ class Document:
|
|
|
246
244
|
return None
|
|
247
245
|
|
|
248
246
|
def get_manifestation_datetimes(
|
|
249
|
-
self,
|
|
247
|
+
self,
|
|
248
|
+
name: Optional[str] = None,
|
|
250
249
|
) -> list[datetime.datetime]:
|
|
251
250
|
name_filter = f"[@name='{name}']" if name else ""
|
|
252
251
|
iso_datetimes = self.xml.get_xpath_match_strings(
|
|
@@ -254,10 +253,12 @@ class Document:
|
|
|
254
253
|
f"/akn:FRBRdate{name_filter}/@date",
|
|
255
254
|
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
256
255
|
)
|
|
257
|
-
|
|
256
|
+
|
|
257
|
+
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
258
258
|
|
|
259
259
|
def get_latest_manifestation_datetime(
|
|
260
|
-
self,
|
|
260
|
+
self,
|
|
261
|
+
name: Optional[str] = None,
|
|
261
262
|
) -> Optional[datetime.datetime]:
|
|
262
263
|
events = self.get_manifestation_datetimes(name)
|
|
263
264
|
if not events:
|
|
@@ -347,7 +348,7 @@ class Document:
|
|
|
347
348
|
"""
|
|
348
349
|
if self.is_version:
|
|
349
350
|
raise NotSupportedOnVersion(
|
|
350
|
-
"Cannot get versions of a version for {self.uri}"
|
|
351
|
+
"Cannot get versions of a version for {self.uri}",
|
|
351
352
|
)
|
|
352
353
|
docs = []
|
|
353
354
|
for version in self.versions:
|
|
@@ -364,7 +365,7 @@ class Document:
|
|
|
364
365
|
version = extract_version(self.uri)
|
|
365
366
|
if version == 0:
|
|
366
367
|
raise OnlySupportedOnVersion(
|
|
367
|
-
f"Version number requested for {self.uri} which is not a version"
|
|
368
|
+
f"Version number requested for {self.uri} which is not a version",
|
|
368
369
|
)
|
|
369
370
|
return version
|
|
370
371
|
|
|
@@ -384,7 +385,10 @@ class Document:
|
|
|
384
385
|
) -> str:
|
|
385
386
|
try:
|
|
386
387
|
results = self.api_client.eval_xslt(
|
|
387
|
-
self.uri,
|
|
388
|
+
self.uri,
|
|
389
|
+
version_uri,
|
|
390
|
+
show_unpublished=True,
|
|
391
|
+
query=query,
|
|
388
392
|
)
|
|
389
393
|
multipart_data = decoder.MultipartDecoder.from_response(results)
|
|
390
394
|
return str(multipart_data.parts[0].text)
|
|
@@ -449,7 +453,7 @@ class Document:
|
|
|
449
453
|
def has_valid_court(self) -> bool:
|
|
450
454
|
try:
|
|
451
455
|
return bool(
|
|
452
|
-
courts.get_by_code(self.court_and_jurisdiction_identifier_string)
|
|
456
|
+
courts.get_by_code(self.court_and_jurisdiction_identifier_string),
|
|
453
457
|
)
|
|
454
458
|
except CourtNotFoundException:
|
|
455
459
|
return False
|
|
@@ -495,7 +499,9 @@ class Document:
|
|
|
495
499
|
"""
|
|
496
500
|
now = datetime.datetime.now(datetime.timezone.utc)
|
|
497
501
|
self.api_client.set_property(
|
|
498
|
-
self.uri,
|
|
502
|
+
self.uri,
|
|
503
|
+
"last_sent_to_enrichment",
|
|
504
|
+
now.isoformat(),
|
|
499
505
|
)
|
|
500
506
|
|
|
501
507
|
announce_document_event(
|
|
@@ -527,9 +533,13 @@ class Document:
|
|
|
527
533
|
"""
|
|
528
534
|
Has this document been enriched recently?
|
|
529
535
|
"""
|
|
536
|
+
|
|
530
537
|
last_enrichment = self.enrichment_datetime
|
|
538
|
+
if not last_enrichment:
|
|
539
|
+
return False
|
|
540
|
+
|
|
531
541
|
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
532
|
-
if
|
|
542
|
+
if now - last_enrichment < MINIMUM_ENRICHMENT_TIME:
|
|
533
543
|
return True
|
|
534
544
|
return False
|
|
535
545
|
|
|
@@ -590,7 +600,7 @@ class Document:
|
|
|
590
600
|
self.api_client.delete_judgment(self.uri)
|
|
591
601
|
delete_documents_from_private_bucket(self.uri)
|
|
592
602
|
else:
|
|
593
|
-
raise DocumentNotSafeForDeletion
|
|
603
|
+
raise DocumentNotSafeForDeletion
|
|
594
604
|
|
|
595
605
|
def overwrite(self, new_citation: str) -> None:
|
|
596
606
|
self.api_client.overwrite_document(self.uri, new_citation)
|
|
@@ -604,14 +614,8 @@ class Document:
|
|
|
604
614
|
now = datetime.datetime.now(datetime.timezone.utc)
|
|
605
615
|
self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
|
|
606
616
|
|
|
607
|
-
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[
|
|
608
|
-
|
|
609
|
-
]
|
|
610
|
-
checked_date = (
|
|
611
|
-
self.document_date_as_string
|
|
612
|
-
if self.document_date_as_string > "1001"
|
|
613
|
-
else None
|
|
614
|
-
)
|
|
617
|
+
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
|
|
618
|
+
checked_date = self.document_date_as_string if self.document_date_as_string > "1001" else None
|
|
615
619
|
|
|
616
620
|
# the keys of parser_instructions should exactly match the parser output
|
|
617
621
|
# in the *-metadata.json files by the parser. Whilst typically empty
|
|
@@ -635,6 +639,10 @@ class Document:
|
|
|
635
639
|
)
|
|
636
640
|
|
|
637
641
|
def reparse(self) -> bool:
|
|
642
|
+
# note that we set 'last_sent_to_parser' even if we can't send it to the parser
|
|
643
|
+
# it means 'last tried to reparse' much more consistently.
|
|
644
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
|
645
|
+
self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
|
|
638
646
|
if self.can_reparse:
|
|
639
647
|
self.force_reparse()
|
|
640
648
|
return True
|
|
@@ -678,6 +686,8 @@ class Document:
|
|
|
678
686
|
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
679
687
|
|
|
680
688
|
def get_xpath_match_strings(
|
|
681
|
-
self,
|
|
689
|
+
self,
|
|
690
|
+
xpath: str,
|
|
691
|
+
namespaces: Dict[str, str],
|
|
682
692
|
) -> list[str]:
|
|
683
693
|
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
@@ -45,10 +45,7 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
45
45
|
"""
|
|
46
46
|
try:
|
|
47
47
|
uri = self.uri + "/press-summary/1"
|
|
48
|
-
PressSummary =
|
|
49
|
-
importlib.import_module("caselawclient.models.press_summaries"),
|
|
50
|
-
"PressSummary",
|
|
51
|
-
)
|
|
48
|
+
PressSummary = importlib.import_module("caselawclient.models.press_summaries").PressSummary
|
|
52
49
|
return PressSummary(uri, self.api_client) # type: ignore
|
|
53
50
|
except DocumentNotFoundError:
|
|
54
51
|
return None
|
|
@@ -19,21 +19,18 @@ class NeutralCitationMixin:
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
def __init__(self, document_noun: str, *args: Any, **kwargs: Any) -> None:
|
|
22
|
-
self.attributes_to_validate: list[tuple[str, bool, str]] =
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
),
|
|
35
|
-
]
|
|
36
|
-
)
|
|
22
|
+
self.attributes_to_validate: list[tuple[str, bool, str]] = self.attributes_to_validate + [
|
|
23
|
+
(
|
|
24
|
+
"has_ncn",
|
|
25
|
+
True,
|
|
26
|
+
f"This {document_noun} has no neutral citation number",
|
|
27
|
+
),
|
|
28
|
+
(
|
|
29
|
+
"has_valid_ncn",
|
|
30
|
+
True,
|
|
31
|
+
f"The neutral citation number of this {document_noun} is not valid",
|
|
32
|
+
),
|
|
33
|
+
]
|
|
37
34
|
|
|
38
35
|
super(NeutralCitationMixin, self).__init__(*args, **kwargs)
|
|
39
36
|
|
|
@@ -40,16 +40,13 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
40
40
|
return self.neutral_citation
|
|
41
41
|
|
|
42
42
|
@cached_property
|
|
43
|
-
def linked_document(self) -> Optional[
|
|
43
|
+
def linked_document(self) -> Optional[Judgment]:
|
|
44
44
|
"""
|
|
45
45
|
Attempt to fetch a linked judgement, and return it, if it exists
|
|
46
46
|
"""
|
|
47
47
|
try:
|
|
48
48
|
uri = self.uri.removesuffix("/press-summary/1")
|
|
49
|
-
Judgment =
|
|
50
|
-
importlib.import_module("caselawclient.models.judgments"),
|
|
51
|
-
"Judgment",
|
|
52
|
-
)
|
|
49
|
+
Judgment = importlib.import_module("caselawclient.models.judgments").Judgment
|
|
53
50
|
return Judgment(uri, self.api_client) # type: ignore
|
|
54
51
|
except DocumentNotFoundError:
|
|
55
52
|
return None
|
|
@@ -2,7 +2,7 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import uuid
|
|
5
|
-
from typing import Any, Literal, Optional, TypedDict,
|
|
5
|
+
from typing import Any, Literal, Optional, TypedDict, overload
|
|
6
6
|
|
|
7
7
|
import boto3
|
|
8
8
|
import botocore.client
|
|
@@ -37,7 +37,7 @@ def create_aws_client(service: Literal["s3"]) -> S3Client: ...
|
|
|
37
37
|
def create_aws_client(service: Literal["sns"]) -> SNSClient: ...
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def create_aws_client(service:
|
|
40
|
+
def create_aws_client(service: Literal["s3", "sns"]) -> Any:
|
|
41
41
|
aws = boto3.session.Session(
|
|
42
42
|
aws_access_key_id=env("AWS_ACCESS_KEY_ID", default=None),
|
|
43
43
|
aws_secret_access_key=env("AWS_SECRET_KEY", default=None),
|
|
@@ -73,8 +73,9 @@ def generate_signed_asset_url(key: str) -> str:
|
|
|
73
73
|
|
|
74
74
|
return str(
|
|
75
75
|
client.generate_presigned_url(
|
|
76
|
-
"get_object",
|
|
77
|
-
|
|
76
|
+
"get_object",
|
|
77
|
+
Params={"Bucket": bucket, "Key": key},
|
|
78
|
+
),
|
|
78
79
|
)
|
|
79
80
|
|
|
80
81
|
|
|
@@ -113,9 +114,7 @@ def delete_from_bucket(uri: str, bucket: str) -> None:
|
|
|
113
114
|
response = client.list_objects(Bucket=bucket, Prefix=uri)
|
|
114
115
|
|
|
115
116
|
if response.get("Contents"):
|
|
116
|
-
objects_to_delete: list[ObjectIdentifierTypeDef] = [
|
|
117
|
-
{"Key": obj["Key"]} for obj in response.get("Contents", [])
|
|
118
|
-
]
|
|
117
|
+
objects_to_delete: list[ObjectIdentifierTypeDef] = [{"Key": obj["Key"]} for obj in response.get("Contents", [])]
|
|
119
118
|
client.delete_objects(
|
|
120
119
|
Bucket=bucket,
|
|
121
120
|
Delete={
|
|
@@ -147,7 +146,7 @@ def publish_documents(uri: str) -> None:
|
|
|
147
146
|
client.copy(source, public_bucket, key, extra_args)
|
|
148
147
|
except botocore.client.ClientError as e:
|
|
149
148
|
logging.warning(
|
|
150
|
-
f"Unable to copy file {key} to new location {public_bucket}, error: {e}"
|
|
149
|
+
f"Unable to copy file {key} to new location {public_bucket}, error: {e}",
|
|
151
150
|
)
|
|
152
151
|
|
|
153
152
|
|
|
@@ -207,7 +206,7 @@ def copy_assets(old_uri: str, new_uri: str) -> None:
|
|
|
207
206
|
client.copy(source, bucket, new_key)
|
|
208
207
|
except botocore.client.ClientError as e:
|
|
209
208
|
logging.warning(
|
|
210
|
-
f"Unable to copy file {old_key} to new location {new_key}, error: {e}"
|
|
209
|
+
f"Unable to copy file {old_key} to new location {new_key}, error: {e}",
|
|
211
210
|
)
|
|
212
211
|
|
|
213
212
|
|
|
@@ -236,9 +235,7 @@ def request_parse(
|
|
|
236
235
|
message_to_send = {
|
|
237
236
|
"properties": {
|
|
238
237
|
"messageType": "uk.gov.nationalarchives.da.messages.request.courtdocument.parse.RequestCourtDocumentParse",
|
|
239
|
-
"timestamp": datetime.datetime.now(datetime.timezone.utc)
|
|
240
|
-
.isoformat()
|
|
241
|
-
.replace("+00:00", "Z"),
|
|
238
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
242
239
|
"function": "fcl-judgment-parse-request",
|
|
243
240
|
"producer": "FCL",
|
|
244
241
|
"executionId": str(uuid.uuid4()),
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from dateutil.parser import isoparse
|
|
4
|
+
from pytz import UTC, tzinfo
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def parse_string_date_as_utc(iso_string: str, timezone: tzinfo.BaseTzInfo) -> datetime:
|
|
8
|
+
"""iso_string might be aware or unaware:
|
|
9
|
+
ensure that it is converted to a UTC-aware datetime"""
|
|
10
|
+
|
|
11
|
+
mixed_date = isoparse(iso_string)
|
|
12
|
+
if not mixed_date.tzinfo:
|
|
13
|
+
# it is an unaware time
|
|
14
|
+
aware_date = timezone.localize(mixed_date)
|
|
15
|
+
else:
|
|
16
|
+
aware_date = mixed_date
|
|
17
|
+
|
|
18
|
+
# make UTC
|
|
19
|
+
utc_date = aware_date.astimezone(UTC)
|
|
20
|
+
return utc_date
|
|
@@ -36,15 +36,15 @@ def overwrite_document(
|
|
|
36
36
|
|
|
37
37
|
if new_uri == source_uri:
|
|
38
38
|
raise RuntimeError(
|
|
39
|
-
f"Attempted to overwrite document {source_uri} with itself, which is not permitted."
|
|
39
|
+
f"Attempted to overwrite document {source_uri} with itself, which is not permitted.",
|
|
40
40
|
)
|
|
41
41
|
if new_uri is None:
|
|
42
42
|
raise NeutralCitationToUriError(
|
|
43
|
-
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}"
|
|
43
|
+
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}",
|
|
44
44
|
)
|
|
45
45
|
if not api_client.document_exists(new_uri):
|
|
46
46
|
raise OverwriteJudgmentError(
|
|
47
|
-
f"The URI {new_uri} generated from {target_citation} does not already exist, so cannot be overwritten"
|
|
47
|
+
f"The URI {new_uri} generated from {target_citation} does not already exist, so cannot be overwritten",
|
|
48
48
|
)
|
|
49
49
|
old_doc = api_client.get_document_by_uri_or_404(source_uri)
|
|
50
50
|
try:
|
|
@@ -60,14 +60,14 @@ def overwrite_document(
|
|
|
60
60
|
api_client.set_judgment_this_uri(new_uri)
|
|
61
61
|
except MarklogicAPIError as e:
|
|
62
62
|
raise OverwriteJudgmentError(
|
|
63
|
-
f"Failure when attempting to copy judgment from {source_uri} to {new_uri}: {e}"
|
|
63
|
+
f"Failure when attempting to copy judgment from {source_uri} to {new_uri}: {e}",
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
try:
|
|
67
67
|
api_client.delete_judgment(source_uri)
|
|
68
68
|
except MarklogicAPIError as e:
|
|
69
69
|
raise OverwriteJudgmentError(
|
|
70
|
-
f"Failure when attempting to delete judgment from {source_uri}: {e}"
|
|
70
|
+
f"Failure when attempting to delete judgment from {source_uri}: {e}",
|
|
71
71
|
)
|
|
72
72
|
|
|
73
73
|
return new_uri
|
|
@@ -86,13 +86,13 @@ def update_document_uri(source_uri: str, target_citation: str, api_client: Any)
|
|
|
86
86
|
new_uri: Optional[str] = caselawutils.neutral_url(target_citation.strip())
|
|
87
87
|
if new_uri is None:
|
|
88
88
|
raise NeutralCitationToUriError(
|
|
89
|
-
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}"
|
|
89
|
+
f"Unable to form new URI for {source_uri} from neutral citation: {target_citation}",
|
|
90
90
|
)
|
|
91
91
|
|
|
92
92
|
if api_client.document_exists(new_uri):
|
|
93
93
|
raise MoveJudgmentError(
|
|
94
94
|
f"The URI {new_uri} generated from {target_citation} already exists, you cannot move this judgment to a"
|
|
95
|
-
f" pre-existing Neutral Citation Number."
|
|
95
|
+
f" pre-existing Neutral Citation Number.",
|
|
96
96
|
)
|
|
97
97
|
|
|
98
98
|
try:
|
|
@@ -102,14 +102,14 @@ def update_document_uri(source_uri: str, target_citation: str, api_client: Any)
|
|
|
102
102
|
api_client.set_judgment_this_uri(new_uri)
|
|
103
103
|
except MarklogicAPIError as e:
|
|
104
104
|
raise MoveJudgmentError(
|
|
105
|
-
f"Failure when attempting to copy judgment from {source_uri} to {new_uri}: {e}"
|
|
105
|
+
f"Failure when attempting to copy judgment from {source_uri} to {new_uri}: {e}",
|
|
106
106
|
)
|
|
107
107
|
|
|
108
108
|
try:
|
|
109
109
|
api_client.delete_judgment(source_uri)
|
|
110
110
|
except MarklogicAPIError as e:
|
|
111
111
|
raise MoveJudgmentError(
|
|
112
|
-
f"Failure when attempting to delete judgment from {source_uri}: {e}"
|
|
112
|
+
f"Failure when attempting to delete judgment from {source_uri}: {e}",
|
|
113
113
|
)
|
|
114
114
|
|
|
115
115
|
return new_uri
|
|
@@ -120,7 +120,8 @@ def set_metadata(old_uri: str, new_uri: str, api_client: Any) -> None:
|
|
|
120
120
|
source_name = api_client.get_property(old_uri, "source-name")
|
|
121
121
|
source_email = api_client.get_property(old_uri, "source-email")
|
|
122
122
|
transfer_consignment_reference = api_client.get_property(
|
|
123
|
-
old_uri,
|
|
123
|
+
old_uri,
|
|
124
|
+
"transfer-consignment-reference",
|
|
124
125
|
)
|
|
125
126
|
transfer_received_at = api_client.get_property(old_uri, "transfer-received-at")
|
|
126
127
|
for key, value in [
|
|
@@ -31,7 +31,7 @@ class SearchResponse:
|
|
|
31
31
|
:return: The total number of search results
|
|
32
32
|
"""
|
|
33
33
|
return str(
|
|
34
|
-
self.node.xpath("//search:response/@total", namespaces=self.NAMESPACES)[0]
|
|
34
|
+
self.node.xpath("//search:response/@total", namespaces=self.NAMESPACES)[0],
|
|
35
35
|
)
|
|
36
36
|
|
|
37
37
|
@property
|
|
@@ -42,7 +42,8 @@ class SearchResponse:
|
|
|
42
42
|
:return: The list of search results
|
|
43
43
|
"""
|
|
44
44
|
results = self.node.xpath(
|
|
45
|
-
"//search:response/search:result",
|
|
45
|
+
"//search:response/search:result",
|
|
46
|
+
namespaces=self.NAMESPACES,
|
|
46
47
|
)
|
|
47
48
|
return [SearchResult(result, self.client) for result in results]
|
|
48
49
|
|
|
@@ -58,7 +59,5 @@ class SearchResponse:
|
|
|
58
59
|
"//search:response/search:facet/search:facet-value",
|
|
59
60
|
namespaces={"search": "http://marklogic.com/appservices/search"},
|
|
60
61
|
)
|
|
61
|
-
facets_dictionary = {
|
|
62
|
-
result.attrib["name"]: result.attrib["count"] for result in results
|
|
63
|
-
}
|
|
62
|
+
facets_dictionary = {result.attrib["name"]: result.attrib["count"] for result in results}
|
|
64
63
|
return facets_dictionary
|
|
@@ -99,7 +99,8 @@ class SearchResultMetadata:
|
|
|
99
99
|
"""
|
|
100
100
|
|
|
101
101
|
return self._get_xpath_match_string(
|
|
102
|
-
"//editor-priority/text()",
|
|
102
|
+
"//editor-priority/text()",
|
|
103
|
+
EditorPriority.MEDIUM.value,
|
|
103
104
|
)
|
|
104
105
|
|
|
105
106
|
@property
|
|
@@ -109,7 +110,7 @@ class SearchResultMetadata:
|
|
|
109
110
|
"""
|
|
110
111
|
|
|
111
112
|
extracted_submission_datetime = self._get_xpath_match_string(
|
|
112
|
-
"//transfer-received-at/text()"
|
|
113
|
+
"//transfer-received-at/text()",
|
|
113
114
|
)
|
|
114
115
|
return (
|
|
115
116
|
datetime.strptime(extracted_submission_datetime, "%Y-%m-%dT%H:%M:%SZ")
|
|
@@ -164,7 +165,7 @@ class SearchResult:
|
|
|
164
165
|
"""
|
|
165
166
|
|
|
166
167
|
return DocumentURIString(
|
|
167
|
-
self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0]
|
|
168
|
+
self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0],
|
|
168
169
|
)
|
|
169
170
|
|
|
170
171
|
@property
|
|
@@ -174,7 +175,7 @@ class SearchResult:
|
|
|
174
175
|
"""
|
|
175
176
|
|
|
176
177
|
return self._get_xpath_match_string(
|
|
177
|
-
"search:extracted/uk:cite/text()"
|
|
178
|
+
"search:extracted/uk:cite/text()",
|
|
178
179
|
) or self._get_xpath_match_string("search:extracted/akn:neutralCitation/text()")
|
|
179
180
|
|
|
180
181
|
@property
|
|
@@ -195,7 +196,7 @@ class SearchResult:
|
|
|
195
196
|
court = None
|
|
196
197
|
court_code = self._get_xpath_match_string("search:extracted/uk:court/text()")
|
|
197
198
|
jurisdiction_code = self._get_xpath_match_string(
|
|
198
|
-
"search:extracted/uk:jurisdiction/text()"
|
|
199
|
+
"search:extracted/uk:jurisdiction/text()",
|
|
199
200
|
)
|
|
200
201
|
if jurisdiction_code:
|
|
201
202
|
court_code_with_jurisdiction = "%s/%s" % (court_code, jurisdiction_code)
|
|
@@ -204,7 +205,7 @@ class SearchResult:
|
|
|
204
205
|
except CourtNotFoundException:
|
|
205
206
|
logging.warning(
|
|
206
207
|
"Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court."
|
|
207
|
-
% (court_code, jurisdiction_code, self.neutral_citation)
|
|
208
|
+
% (court_code, jurisdiction_code, self.neutral_citation),
|
|
208
209
|
)
|
|
209
210
|
if court is None:
|
|
210
211
|
try:
|
|
@@ -212,7 +213,7 @@ class SearchResult:
|
|
|
212
213
|
except CourtNotFoundException:
|
|
213
214
|
logging.warning(
|
|
214
215
|
"Court not found with court code %s for judgment with NCN %s, returning None."
|
|
215
|
-
% (court_code, self.neutral_citation)
|
|
216
|
+
% (court_code, self.neutral_citation),
|
|
216
217
|
)
|
|
217
218
|
court = None
|
|
218
219
|
return court
|
|
@@ -224,13 +225,13 @@ class SearchResult:
|
|
|
224
225
|
"""
|
|
225
226
|
|
|
226
227
|
date_string = self._get_xpath_match_string(
|
|
227
|
-
"search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date"
|
|
228
|
+
"search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date",
|
|
228
229
|
)
|
|
229
230
|
try:
|
|
230
231
|
date = dateparser.parse(date_string)
|
|
231
232
|
except ParserError as e:
|
|
232
233
|
logging.warning(
|
|
233
|
-
f'Unable to parse document date "{date_string}". Full error: {e}'
|
|
234
|
+
f'Unable to parse document date "{date_string}". Full error: {e}',
|
|
234
235
|
)
|
|
235
236
|
date = None
|
|
236
237
|
return date
|
|
@@ -242,7 +243,7 @@ class SearchResult:
|
|
|
242
243
|
"""
|
|
243
244
|
|
|
244
245
|
return self._get_xpath_match_string(
|
|
245
|
-
"search:extracted/akn:FRBRdate[@name='transform']/@date"
|
|
246
|
+
"search:extracted/akn:FRBRdate[@name='transform']/@date",
|
|
246
247
|
)
|
|
247
248
|
|
|
248
249
|
@property
|
caselawclient/xml_helpers.py
CHANGED
|
@@ -14,7 +14,9 @@ def get_xpath_match_string(
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def get_xpath_match_strings(
|
|
17
|
-
node: etree._Element,
|
|
17
|
+
node: etree._Element,
|
|
18
|
+
path: str,
|
|
19
|
+
namespaces: Optional[Dict[str, str]] = None,
|
|
18
20
|
) -> list[str]:
|
|
19
21
|
kwargs = {"namespaces": namespaces} if namespaces else {}
|
|
20
22
|
return [str(x) for x in node.xpath(path, **kwargs)]
|
caselawclient/xml_tools.py
CHANGED
|
@@ -31,7 +31,7 @@ def get_element(
|
|
|
31
31
|
) -> Element:
|
|
32
32
|
logging.warning(
|
|
33
33
|
"XMLTools is deprecated and will be removed in later versions. "
|
|
34
|
-
"Use methods from MarklogicApiClient.Client instead."
|
|
34
|
+
"Use methods from MarklogicApiClient.Client instead.",
|
|
35
35
|
)
|
|
36
36
|
name = xml.find(
|
|
37
37
|
xpath,
|
|
@@ -58,7 +58,7 @@ def get_neutral_citation_name_value(xml: ElementTree) -> Optional[str]:
|
|
|
58
58
|
def get_judgment_date_element(xml: ElementTree) -> Element:
|
|
59
59
|
logging.warning(
|
|
60
60
|
"XMLTools is deprecated and will be removed in later versions. "
|
|
61
|
-
"Use methods from MarklogicApiClient.Client instead."
|
|
61
|
+
"Use methods from MarklogicApiClient.Client instead.",
|
|
62
62
|
)
|
|
63
63
|
name = xml.find(
|
|
64
64
|
".//akn:FRBRWork/akn:FRBRdate",
|
|
@@ -102,7 +102,7 @@ def get_metadata_name_value(xml: ElementTree) -> str:
|
|
|
102
102
|
def get_search_matches(element: ElementTree) -> List[str]:
|
|
103
103
|
logging.warning(
|
|
104
104
|
"XMLTools is deprecated and will be removed in later versions. "
|
|
105
|
-
"Use methods from MarklogicApiClient.Client instead."
|
|
105
|
+
"Use methods from MarklogicApiClient.Client instead.",
|
|
106
106
|
)
|
|
107
107
|
nodes = element.findall(".//search:match", namespaces=search_namespace)
|
|
108
108
|
results = []
|
|
@@ -115,14 +115,15 @@ def get_search_matches(element: ElementTree) -> List[str]:
|
|
|
115
115
|
def get_error_code(content_as_xml: Optional[str]) -> str:
|
|
116
116
|
logging.warning(
|
|
117
117
|
"XMLTools is deprecated and will be removed in later versions. "
|
|
118
|
-
"Use methods from MarklogicApiClient.Client instead."
|
|
118
|
+
"Use methods from MarklogicApiClient.Client instead.",
|
|
119
119
|
)
|
|
120
120
|
if not content_as_xml:
|
|
121
121
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
122
122
|
try:
|
|
123
123
|
xml = fromstring(content_as_xml)
|
|
124
124
|
return xml.find(
|
|
125
|
-
"message-code",
|
|
125
|
+
"message-code",
|
|
126
|
+
namespaces={"": "http://marklogic.com/xdmp/error"},
|
|
126
127
|
).text # type: ignore
|
|
127
128
|
except (ParseError, TypeError, AttributeError):
|
|
128
129
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 24.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
13
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
14
|
-
Requires-Dist: certifi (>=
|
|
14
|
+
Requires-Dist: certifi (>=2024.7.4,<2024.8.0)
|
|
15
15
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: django-environ (>=0.11.0,<0.12.0)
|
|
17
17
|
Requires-Dist: ds-caselaw-utils (>=1.4.1,<2.0.0)
|
|
@@ -20,10 +20,11 @@ Requires-Dist: lxml (>=5.0.0,<6.0.0)
|
|
|
20
20
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
21
21
|
Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
|
|
22
22
|
Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
|
|
23
|
+
Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
|
|
24
|
+
Requires-Dist: pytz (>=2024.1,<2025.0)
|
|
23
25
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
|
24
26
|
Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
|
|
25
27
|
Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
|
|
26
|
-
Requires-Dist: urllib3 (>=1.26.15,<2.0.0)
|
|
27
28
|
Description-Content-Type: text/markdown
|
|
28
29
|
|
|
29
30
|
# The National Archives: Find Case Law
|
|
@@ -82,13 +83,13 @@ to main alone will **not** trigger a release to PyPI.
|
|
|
82
83
|
To create a release:
|
|
83
84
|
|
|
84
85
|
0. Update the version number in `pyproject.toml`
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
using the created tag
|
|
86
|
+
1. Create a branch `release/v{major}.{minor}.{patch}`
|
|
87
|
+
2. Update `CHANGELOG.md` for the release
|
|
88
|
+
3. Commit and push
|
|
89
|
+
4. Open a PR from that branch to main
|
|
90
|
+
5. Get approval on the PR
|
|
91
|
+
6. Merge the PR to main and push
|
|
92
|
+
7. Tag the merge commit on `main` with `v{major}.{minor}.{patch}` and push the tag
|
|
93
|
+
8. Create a release in [Github releases](https://github.com/nationalarchives/ds-caselaw-custom-api-client/releases)
|
|
94
|
+
using the created tag
|
|
94
95
|
|