ds-caselaw-marklogic-api-client 39.2.1__py3-none-any.whl → 43.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caselawclient/Client.py +51 -1
- caselawclient/client_helpers/__init__.py +0 -114
- caselawclient/factories.py +2 -0
- caselawclient/managers/__init__.py +0 -0
- caselawclient/managers/merge/__init__.py +51 -0
- caselawclient/managers/merge/checks.py +79 -0
- caselawclient/models/documents/__init__.py +70 -2
- caselawclient/models/documents/body.py +50 -1
- caselawclient/models/documents/versions.py +114 -0
- caselawclient/models/documents/xml.py +4 -1
- caselawclient/models/identifiers/__init__.py +4 -1
- caselawclient/models/identifiers/collection.py +2 -0
- caselawclient/models/utilities/aws.py +35 -1
- caselawclient/responses/search_result.py +11 -5
- caselawclient/search_parameters.py +4 -0
- caselawclient/types.py +43 -6
- caselawclient/xml_helpers.py +18 -2
- caselawclient/xquery/check_content_hash_unique_by_uri.xqy +15 -0
- caselawclient/xquery/set_datetime_property.xqy +11 -0
- caselawclient/xquery_type_dicts.py +12 -0
- {ds_caselaw_marklogic_api_client-39.2.1.dist-info → ds_caselaw_marklogic_api_client-43.1.0.dist-info}/METADATA +4 -5
- {ds_caselaw_marklogic_api_client-39.2.1.dist-info → ds_caselaw_marklogic_api_client-43.1.0.dist-info}/RECORD +24 -18
- {ds_caselaw_marklogic_api_client-39.2.1.dist-info → ds_caselaw_marklogic_api_client-43.1.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-39.2.1.dist-info → ds_caselaw_marklogic_api_client-43.1.0.dist-info}/WHEEL +0 -0
caselawclient/Client.py
CHANGED
|
@@ -11,6 +11,7 @@ from xml.etree.ElementTree import Element
|
|
|
11
11
|
|
|
12
12
|
import environ
|
|
13
13
|
import requests
|
|
14
|
+
from dateutil.parser import isoparse
|
|
14
15
|
from defusedxml import ElementTree
|
|
15
16
|
from defusedxml.ElementTree import ParseError, fromstring
|
|
16
17
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
@@ -20,19 +21,20 @@ from requests.structures import CaseInsensitiveDict
|
|
|
20
21
|
from requests_toolbelt.multipart import decoder
|
|
21
22
|
|
|
22
23
|
from caselawclient import xquery_type_dicts as query_dicts
|
|
23
|
-
from caselawclient.client_helpers import VersionAnnotation
|
|
24
24
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
25
25
|
from caselawclient.models.documents import (
|
|
26
26
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
27
27
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
28
28
|
Document,
|
|
29
29
|
)
|
|
30
|
+
from caselawclient.models.documents.versions import VersionAnnotation
|
|
30
31
|
from caselawclient.models.judgments import Judgment
|
|
31
32
|
from caselawclient.models.press_summaries import PressSummary
|
|
32
33
|
from caselawclient.models.utilities import move
|
|
33
34
|
from caselawclient.search_parameters import SearchParameters
|
|
34
35
|
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
|
|
35
36
|
from caselawclient.xquery_type_dicts import (
|
|
37
|
+
CheckContentHashUniqueByUriDict,
|
|
36
38
|
MarkLogicDocumentURIString,
|
|
37
39
|
MarkLogicDocumentVersionURIString,
|
|
38
40
|
MarkLogicPrivilegeURIString,
|
|
@@ -727,6 +729,14 @@ class MarklogicApiClient:
|
|
|
727
729
|
== 0
|
|
728
730
|
)
|
|
729
731
|
|
|
732
|
+
def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
|
|
733
|
+
"""
|
|
734
|
+
Returns True if the content hash for this document is unique (not shared with other documents).
|
|
735
|
+
"""
|
|
736
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
737
|
+
vars: CheckContentHashUniqueByUriDict = {"uri": uri}
|
|
738
|
+
return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
|
|
739
|
+
|
|
730
740
|
def eval(
|
|
731
741
|
self,
|
|
732
742
|
xquery_path: str,
|
|
@@ -792,6 +802,8 @@ class MarklogicApiClient:
|
|
|
792
802
|
:param judge:
|
|
793
803
|
:param party:
|
|
794
804
|
:param neutral_citation:
|
|
805
|
+
:param document_name:
|
|
806
|
+
:param consignment_number:
|
|
795
807
|
:param specific_keyword:
|
|
796
808
|
:param order:
|
|
797
809
|
:param date_from:
|
|
@@ -948,12 +960,50 @@ class MarklogicApiClient:
|
|
|
948
960
|
"value": string_value,
|
|
949
961
|
"name": name,
|
|
950
962
|
}
|
|
963
|
+
"""
|
|
964
|
+
Set a property within MarkLogic which is specifically a boolean.
|
|
965
|
+
|
|
966
|
+
Since XML has no concept of boolean, the actual value in the database is set to `"true"` or `"false"`.
|
|
967
|
+
"""
|
|
951
968
|
return self._send_to_eval(vars, "set_boolean_property.xqy")
|
|
952
969
|
|
|
953
970
|
def get_boolean_property(self, judgment_uri: DocumentURIString, name: str) -> bool:
|
|
971
|
+
"""
|
|
972
|
+
Get a property from MarkLogic which is specifically a boolean.
|
|
973
|
+
|
|
974
|
+
:return: `True` if the property exists and has a value of `"true"`, otherwise `False`
|
|
975
|
+
"""
|
|
954
976
|
content = self.get_property(judgment_uri, name)
|
|
955
977
|
return content == "true"
|
|
956
978
|
|
|
979
|
+
def set_datetime_property(
|
|
980
|
+
self,
|
|
981
|
+
judgment_uri: DocumentURIString,
|
|
982
|
+
name: str,
|
|
983
|
+
value: datetime,
|
|
984
|
+
) -> requests.Response:
|
|
985
|
+
"""Set a property within MarkLogic which is specifically a datetime."""
|
|
986
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
987
|
+
vars: query_dicts.SetDatetimePropertyDict = {
|
|
988
|
+
"uri": uri,
|
|
989
|
+
"value": value.isoformat(),
|
|
990
|
+
"name": name,
|
|
991
|
+
}
|
|
992
|
+
return self._send_to_eval(vars, "set_datetime_property.xqy")
|
|
993
|
+
|
|
994
|
+
def get_datetime_property(self, judgment_uri: DocumentURIString, name: str) -> Optional[datetime]:
|
|
995
|
+
"""
|
|
996
|
+
Get a property from MarkLogic which is specifically a datetime.
|
|
997
|
+
|
|
998
|
+
:return: A datetime with the value of the property, or `None` if it does not exist
|
|
999
|
+
"""
|
|
1000
|
+
content = self.get_property(judgment_uri, name)
|
|
1001
|
+
|
|
1002
|
+
if content:
|
|
1003
|
+
return isoparse(content)
|
|
1004
|
+
|
|
1005
|
+
return None
|
|
1006
|
+
|
|
957
1007
|
def set_published(
|
|
958
1008
|
self,
|
|
959
1009
|
judgment_uri: DocumentURIString,
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from enum import Enum
|
|
3
|
-
from typing import Any, Optional, TypedDict
|
|
4
|
-
|
|
5
1
|
from lxml import etree
|
|
6
|
-
from typing_extensions import NotRequired
|
|
7
2
|
|
|
8
3
|
from caselawclient.xml_helpers import DEFAULT_NAMESPACES
|
|
9
4
|
|
|
@@ -17,115 +12,6 @@ class CannotDetermineDocumentType(Exception):
|
|
|
17
12
|
pass
|
|
18
13
|
|
|
19
14
|
|
|
20
|
-
class AnnotationDataDict(TypedDict):
|
|
21
|
-
type: str
|
|
22
|
-
calling_function: str
|
|
23
|
-
calling_agent: str
|
|
24
|
-
message: NotRequired[str]
|
|
25
|
-
payload: NotRequired[dict[str, Any]]
|
|
26
|
-
automated: bool
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class VersionType(Enum):
|
|
30
|
-
"""Valid types of version."""
|
|
31
|
-
|
|
32
|
-
SUBMISSION = "submission"
|
|
33
|
-
""" This version has been created as a result of a submission of a new document. """
|
|
34
|
-
|
|
35
|
-
ENRICHMENT = "enrichment"
|
|
36
|
-
""" This version has been created through an enrichment process. """
|
|
37
|
-
|
|
38
|
-
EDIT = "edit"
|
|
39
|
-
""" This version has been created as the result of a manual edit. """
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class VersionAnnotation:
|
|
43
|
-
"""A class holding structured data about the reason for a version."""
|
|
44
|
-
|
|
45
|
-
def __init__(
|
|
46
|
-
self,
|
|
47
|
-
version_type: VersionType,
|
|
48
|
-
automated: bool,
|
|
49
|
-
message: Optional[str] = None,
|
|
50
|
-
payload: Optional[dict[str, Any]] = None,
|
|
51
|
-
):
|
|
52
|
-
"""
|
|
53
|
-
:param version_type: The type of version being created
|
|
54
|
-
:param automated: `True` if this action has happened as the result of an automated process, rather than a human
|
|
55
|
-
action
|
|
56
|
-
:param message: A human-readable string containing information about the version which can't be expressed in the
|
|
57
|
-
structured data.
|
|
58
|
-
:param payload: A dict containing additional information relevant to this version change
|
|
59
|
-
"""
|
|
60
|
-
self.version_type = version_type
|
|
61
|
-
self.automated = automated
|
|
62
|
-
self.message = message
|
|
63
|
-
self.payload = payload
|
|
64
|
-
|
|
65
|
-
self.calling_function: Optional[str] = None
|
|
66
|
-
self.calling_agent: Optional[str] = None
|
|
67
|
-
|
|
68
|
-
def set_calling_function(self, calling_function: str) -> None:
|
|
69
|
-
"""
|
|
70
|
-
Set the name of the calling function for tracing purposes
|
|
71
|
-
|
|
72
|
-
:param calling_function: The name of the function which is performing the database write
|
|
73
|
-
"""
|
|
74
|
-
self.calling_function = calling_function
|
|
75
|
-
|
|
76
|
-
def set_calling_agent(self, calling_agent: str) -> None:
|
|
77
|
-
"""
|
|
78
|
-
Set the name of the calling agent for tracing purposes
|
|
79
|
-
|
|
80
|
-
:param calling_agent: The name of the agent which is performing the database write
|
|
81
|
-
"""
|
|
82
|
-
self.calling_agent = calling_agent
|
|
83
|
-
|
|
84
|
-
@property
|
|
85
|
-
def structured_annotation_dict(self) -> AnnotationDataDict:
|
|
86
|
-
"""
|
|
87
|
-
:return: A structured dict representing this `VersionAnnotation`
|
|
88
|
-
|
|
89
|
-
:raises AttributeError: The name of the calling function has not been set; use `set_calling_function()`
|
|
90
|
-
:raises AttributeError: The name of the calling agent has not been set; use `set_calling_agent()`
|
|
91
|
-
"""
|
|
92
|
-
if not self.calling_function:
|
|
93
|
-
raise AttributeError(
|
|
94
|
-
"The name of the calling function has not been set; use set_calling_function()",
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
if not self.calling_agent:
|
|
98
|
-
raise AttributeError(
|
|
99
|
-
"The name of the calling agent has not been set; use set_calling_agent()",
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
annotation_data: AnnotationDataDict = {
|
|
103
|
-
"type": self.version_type.value,
|
|
104
|
-
"calling_function": self.calling_function,
|
|
105
|
-
"calling_agent": self.calling_agent,
|
|
106
|
-
"automated": self.automated,
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
if self.message:
|
|
110
|
-
annotation_data["message"] = self.message
|
|
111
|
-
|
|
112
|
-
if self.payload:
|
|
113
|
-
annotation_data["payload"] = self.payload
|
|
114
|
-
|
|
115
|
-
return annotation_data
|
|
116
|
-
|
|
117
|
-
@property
|
|
118
|
-
def as_json(self) -> str:
|
|
119
|
-
"""Render the structured annotation data as JSON, so it can be stored in the MarkLogic dls:annotation field.
|
|
120
|
-
|
|
121
|
-
:return: A JSON string representing this `VersionAnnotation`"""
|
|
122
|
-
|
|
123
|
-
return json.dumps(self.structured_annotation_dict)
|
|
124
|
-
|
|
125
|
-
def __str__(self) -> str:
|
|
126
|
-
return self.as_json
|
|
127
|
-
|
|
128
|
-
|
|
129
15
|
def get_document_type_class(xml: bytes) -> type[Document]:
|
|
130
16
|
"""Attempt to get the type of the document based on the top-level structure of the XML document."""
|
|
131
17
|
|
caselawclient/factories.py
CHANGED
|
@@ -61,6 +61,8 @@ class DocumentFactory:
|
|
|
61
61
|
"source_name": "Example Uploader",
|
|
62
62
|
"source_email": "uploader@example.com",
|
|
63
63
|
"consignment_reference": "TDR-12345",
|
|
64
|
+
"first_published_datetime": None,
|
|
65
|
+
"has_ever_been_published": False,
|
|
64
66
|
"assigned_to": "",
|
|
65
67
|
"versions": [],
|
|
66
68
|
}
|
|
File without changes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import caselawclient.managers.merge.checks as checks
|
|
2
|
+
from caselawclient.models.documents import Document
|
|
3
|
+
from caselawclient.types import SuccessFailureMessageTuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _combine_list_of_successfailure_results(
|
|
7
|
+
validations: list[SuccessFailureMessageTuple],
|
|
8
|
+
) -> SuccessFailureMessageTuple:
|
|
9
|
+
"""Given a list of SuccessFailureMessageTuples, combine the success/failure states and any messages into a single new object representing the overall success/failure state."""
|
|
10
|
+
success = True
|
|
11
|
+
messages: list[str] = []
|
|
12
|
+
|
|
13
|
+
for validation in validations:
|
|
14
|
+
if validation.success is False:
|
|
15
|
+
success = False
|
|
16
|
+
|
|
17
|
+
messages += validation.messages
|
|
18
|
+
|
|
19
|
+
return SuccessFailureMessageTuple(success, messages)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MergeManager:
|
|
23
|
+
@classmethod
|
|
24
|
+
def check_document_is_safe_as_merge_source(cls, source_document: Document) -> SuccessFailureMessageTuple:
|
|
25
|
+
"""
|
|
26
|
+
Is the given document safe to be considered as a merge source?
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
return _combine_list_of_successfailure_results(
|
|
30
|
+
[
|
|
31
|
+
checks.check_document_is_not_version(source_document),
|
|
32
|
+
checks.check_document_has_only_one_version(source_document),
|
|
33
|
+
checks.check_document_has_never_been_published(source_document),
|
|
34
|
+
checks.check_document_is_safe_to_delete(source_document),
|
|
35
|
+
]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def check_source_document_is_safe_to_merge_into_target(
|
|
40
|
+
cls, source_document: Document, target_document: Document
|
|
41
|
+
) -> SuccessFailureMessageTuple:
|
|
42
|
+
"""Is the given source document safe to merge into a given target?"""
|
|
43
|
+
|
|
44
|
+
return _combine_list_of_successfailure_results(
|
|
45
|
+
[
|
|
46
|
+
checks.check_documents_are_not_same_document(source_document, target_document),
|
|
47
|
+
checks.check_document_is_not_version(target_document),
|
|
48
|
+
checks.check_documents_are_same_type(source_document, target_document),
|
|
49
|
+
checks.check_source_document_is_newer_than_target(source_document, target_document),
|
|
50
|
+
]
|
|
51
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from caselawclient.models.documents import Document
|
|
2
|
+
from caselawclient.types import SuccessFailureMessageTuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def check_document_is_not_version(document: Document) -> SuccessFailureMessageTuple:
|
|
6
|
+
"""Check that the document URI isn't a specific version"""
|
|
7
|
+
if document.is_version:
|
|
8
|
+
return SuccessFailureMessageTuple(
|
|
9
|
+
False,
|
|
10
|
+
["This document is a specific version, and cannot be used as a merge source"],
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
return SuccessFailureMessageTuple(True, [])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_document_has_only_one_version(document: Document) -> SuccessFailureMessageTuple:
|
|
17
|
+
"""Make sure the document has exactly one version."""
|
|
18
|
+
if len(document.versions) > 1:
|
|
19
|
+
return SuccessFailureMessageTuple(
|
|
20
|
+
False,
|
|
21
|
+
["This document has more than one version"],
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return SuccessFailureMessageTuple(True, [])
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def check_document_has_never_been_published(document: Document) -> SuccessFailureMessageTuple:
|
|
28
|
+
"""Make sure the document has never been published."""
|
|
29
|
+
if document.has_ever_been_published:
|
|
30
|
+
return SuccessFailureMessageTuple(
|
|
31
|
+
False,
|
|
32
|
+
["This document has previously been published"],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
return SuccessFailureMessageTuple(True, [])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def check_document_is_safe_to_delete(document: Document) -> SuccessFailureMessageTuple:
|
|
39
|
+
"""Make sure the document is safe to delete."""
|
|
40
|
+
if not document.safe_to_delete:
|
|
41
|
+
return SuccessFailureMessageTuple(
|
|
42
|
+
False,
|
|
43
|
+
["This document cannot be deleted because it is published"],
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return SuccessFailureMessageTuple(True, [])
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def check_documents_are_not_same_document(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
|
|
50
|
+
"""Check that two documents aren't actually the same"""
|
|
51
|
+
if document_one.uri == document_two.uri:
|
|
52
|
+
return SuccessFailureMessageTuple(
|
|
53
|
+
False,
|
|
54
|
+
["You cannot merge a document with itself"],
|
|
55
|
+
)
|
|
56
|
+
return SuccessFailureMessageTuple(True, [])
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def check_documents_are_same_type(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
|
|
60
|
+
"""Check to see if this document is the same type as a target document."""
|
|
61
|
+
if type(document_one) is not type(document_two):
|
|
62
|
+
return SuccessFailureMessageTuple(
|
|
63
|
+
False,
|
|
64
|
+
[
|
|
65
|
+
f"The type of {document_one.uri} ({type(document_one).document_noun}) does not match the type of {document_two.uri} ({type(document_two).document_noun})"
|
|
66
|
+
],
|
|
67
|
+
)
|
|
68
|
+
return SuccessFailureMessageTuple(True, [])
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def check_source_document_is_newer_than_target(
|
|
72
|
+
source_document: Document, target_document: Document
|
|
73
|
+
) -> SuccessFailureMessageTuple:
|
|
74
|
+
"""Check to see if the created datetime of the latest version of this document is newer than the created datetime of the latest version of a target document."""
|
|
75
|
+
if source_document.version_created_datetime < target_document.version_created_datetime:
|
|
76
|
+
return SuccessFailureMessageTuple(
|
|
77
|
+
False, [f"The document at {source_document.uri} is older than the latest version of {target_document.uri}"]
|
|
78
|
+
)
|
|
79
|
+
return SuccessFailureMessageTuple(True, [])
|
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
7
7
|
from ds_caselaw_utils import courts
|
|
8
8
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
9
9
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
10
|
+
from pydantic import TypeAdapter
|
|
10
11
|
from requests_toolbelt.multipart import decoder
|
|
11
12
|
|
|
12
13
|
import caselawclient.models.documents.comparison as comparison
|
|
@@ -16,6 +17,7 @@ from caselawclient.errors import (
|
|
|
16
17
|
OnlySupportedOnVersion,
|
|
17
18
|
)
|
|
18
19
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
20
|
+
from caselawclient.models.documents.versions import AnnotationDataDict
|
|
19
21
|
from caselawclient.models.identifiers import Identifier
|
|
20
22
|
from caselawclient.models.identifiers.exceptions import IdentifierValidationException
|
|
21
23
|
from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
|
|
@@ -95,6 +97,11 @@ class Document:
|
|
|
95
97
|
True,
|
|
96
98
|
"The court for this {document_noun} is not valid",
|
|
97
99
|
),
|
|
100
|
+
(
|
|
101
|
+
"has_unique_content_hash",
|
|
102
|
+
True,
|
|
103
|
+
"There is another document with identical content",
|
|
104
|
+
),
|
|
98
105
|
]
|
|
99
106
|
"""
|
|
100
107
|
A list of tuples in the form:
|
|
@@ -305,6 +312,41 @@ class Document:
|
|
|
305
312
|
# An empty list (which is falsy) therefore means the judgment can be published safely.
|
|
306
313
|
return not self.validation_failure_messages
|
|
307
314
|
|
|
315
|
+
@cached_property
|
|
316
|
+
def first_published_datetime(self) -> Optional[datetime.datetime]:
|
|
317
|
+
"""
|
|
318
|
+
Return the database value for the date and time this document was first published.
|
|
319
|
+
|
|
320
|
+
:return: The datetime value in the database for "first published".
|
|
321
|
+
"""
|
|
322
|
+
return self.api_client.get_datetime_property(self.uri, "first_published_datetime")
|
|
323
|
+
|
|
324
|
+
@cached_property
|
|
325
|
+
def first_published_datetime_display(self) -> Optional[datetime.datetime]:
|
|
326
|
+
"""
|
|
327
|
+
Return the display value for the date and time this document was first published.
|
|
328
|
+
|
|
329
|
+
A value of 1970-01-01 00:00 indicates that the document has been published previously, but the exact date and time is unknown. In this case, return `None`. This can be used alongside `has_ever_been_published` to indicate an "unknown" state.
|
|
330
|
+
|
|
331
|
+
:return: The datetime value to be displayed to end users for "first published".
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
if self.first_published_datetime == datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc):
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
return self.first_published_datetime
|
|
338
|
+
|
|
339
|
+
@cached_property
|
|
340
|
+
def has_ever_been_published(self) -> bool:
|
|
341
|
+
"""
|
|
342
|
+
Do we consider this document to have ever been published?
|
|
343
|
+
|
|
344
|
+
This is `True` if either the document is currently published, or if `first_published_datetime` has any value (including the sentinel value).
|
|
345
|
+
|
|
346
|
+
:return: A boolean indicating if the document has ever been published.
|
|
347
|
+
"""
|
|
348
|
+
return self.is_published or self.first_published_datetime is not None
|
|
349
|
+
|
|
308
350
|
@cached_property
|
|
309
351
|
def validation_failure_messages(self) -> list[str]:
|
|
310
352
|
exception_list = []
|
|
@@ -317,6 +359,17 @@ class Document:
|
|
|
317
359
|
def annotation(self) -> str:
|
|
318
360
|
return self.api_client.get_version_annotation(self.uri)
|
|
319
361
|
|
|
362
|
+
@cached_property
|
|
363
|
+
def structured_annotation(self) -> AnnotationDataDict:
|
|
364
|
+
annotation_data_dict_loader = TypeAdapter(AnnotationDataDict)
|
|
365
|
+
|
|
366
|
+
return annotation_data_dict_loader.validate_json(self.annotation)
|
|
367
|
+
|
|
368
|
+
@cached_property
|
|
369
|
+
def has_unique_content_hash(self) -> bool:
|
|
370
|
+
"""Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
|
|
371
|
+
return self.api_client.has_unique_content_hash(self.uri)
|
|
372
|
+
|
|
320
373
|
@cached_property
|
|
321
374
|
def version_created_datetime(self) -> datetime.datetime:
|
|
322
375
|
return self.api_client.get_version_created_datetime(self.uri)
|
|
@@ -407,6 +460,8 @@ class Document:
|
|
|
407
460
|
|
|
408
461
|
def publish(self) -> None:
|
|
409
462
|
"""
|
|
463
|
+
Assuming that a document passes pre-publish checks, perform all necessary operations to put it into a published state.
|
|
464
|
+
|
|
410
465
|
:raises CannotPublishUnpublishableDocument: This document has not passed the checks in `is_publishable`, and as
|
|
411
466
|
such cannot be published.
|
|
412
467
|
"""
|
|
@@ -416,12 +471,25 @@ class Document:
|
|
|
416
471
|
## Make sure the document has an FCLID
|
|
417
472
|
self.assign_fclid_if_missing()
|
|
418
473
|
|
|
474
|
+
## Copy the document assets into the appropriate place in S3
|
|
419
475
|
publish_documents(self.uri)
|
|
476
|
+
|
|
477
|
+
## Set the fact the document is published
|
|
420
478
|
self.api_client.set_published(self.uri, True)
|
|
479
|
+
|
|
480
|
+
## If necessary, set the first published date
|
|
481
|
+
if not self.first_published_datetime:
|
|
482
|
+
self.api_client.set_datetime_property(
|
|
483
|
+
self.uri, "first_published_datetime", datetime.datetime.now(datetime.timezone.utc)
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
## Announce the publication on the event bus
|
|
421
487
|
announce_document_event(
|
|
422
488
|
uri=self.uri,
|
|
423
489
|
status="publish",
|
|
424
490
|
)
|
|
491
|
+
|
|
492
|
+
## Send the document off for enrichment, but accept if we can't for any reason
|
|
425
493
|
self.enrich(accept_failures=True)
|
|
426
494
|
|
|
427
495
|
def unpublish(self) -> None:
|
|
@@ -517,14 +585,14 @@ class Document:
|
|
|
517
585
|
"""
|
|
518
586
|
Is it sensible to reparse this document?
|
|
519
587
|
"""
|
|
520
|
-
return self.docx_exists()
|
|
588
|
+
return self.docx_exists() and not self.body.has_external_data
|
|
521
589
|
|
|
522
590
|
@cached_property
|
|
523
591
|
def can_enrich(self) -> bool:
|
|
524
592
|
"""
|
|
525
593
|
Is it possible to enrich this document?
|
|
526
594
|
"""
|
|
527
|
-
return self.body.has_content
|
|
595
|
+
return self.body.has_content and not self.body.has_external_data
|
|
528
596
|
|
|
529
597
|
def validate_identifiers(self) -> SuccessFailureMessageTuple:
|
|
530
598
|
return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
|
|
@@ -6,9 +6,11 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
import pytz
|
|
8
8
|
from ds_caselaw_utils.types import CourtCode
|
|
9
|
+
from lxml import etree
|
|
9
10
|
from saxonche import PySaxonProcessor
|
|
10
11
|
|
|
11
12
|
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
13
|
+
from caselawclient.types import DocumentCategory
|
|
12
14
|
|
|
13
15
|
from .xml import XML
|
|
14
16
|
|
|
@@ -37,6 +39,9 @@ class DocumentBody:
|
|
|
37
39
|
def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
|
|
38
40
|
return self._xml.get_xpath_match_strings(xpath, namespaces)
|
|
39
41
|
|
|
42
|
+
def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
|
|
43
|
+
return self._xml.get_xpath_nodes(xpath, namespaces)
|
|
44
|
+
|
|
40
45
|
@cached_property
|
|
41
46
|
def name(self) -> str:
|
|
42
47
|
return self.get_xpath_match_string(
|
|
@@ -51,9 +56,46 @@ class DocumentBody:
|
|
|
51
56
|
def jurisdiction(self) -> str:
|
|
52
57
|
return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
|
|
53
58
|
|
|
59
|
+
@cached_property
|
|
60
|
+
def categories(self) -> list[DocumentCategory]:
|
|
61
|
+
xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
|
|
62
|
+
nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
|
|
63
|
+
|
|
64
|
+
categories: dict[str, DocumentCategory] = {}
|
|
65
|
+
children_map: dict[str, list[DocumentCategory]] = {}
|
|
66
|
+
|
|
67
|
+
for node in nodes:
|
|
68
|
+
name = node.text
|
|
69
|
+
if name is None or not name.strip():
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
category = DocumentCategory(name=name)
|
|
73
|
+
categories[name] = category
|
|
74
|
+
|
|
75
|
+
parent = node.get("parent")
|
|
76
|
+
|
|
77
|
+
if parent:
|
|
78
|
+
children_map.setdefault(parent, []).append(category)
|
|
79
|
+
|
|
80
|
+
for parent, subcategories in children_map.items():
|
|
81
|
+
if parent in categories:
|
|
82
|
+
categories[parent].subcategories.extend(subcategories)
|
|
83
|
+
|
|
84
|
+
top_level_categories = [
|
|
85
|
+
categories[name]
|
|
86
|
+
for node in nodes
|
|
87
|
+
if node.get("parent") is None
|
|
88
|
+
if (name := node.text) and name in categories
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
return top_level_categories
|
|
92
|
+
|
|
93
|
+
# NOTE: Deprecated - use categories function
|
|
54
94
|
@cached_property
|
|
55
95
|
def category(self) -> Optional[str]:
|
|
56
|
-
return self.get_xpath_match_string(
|
|
96
|
+
return self.get_xpath_match_string(
|
|
97
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
|
|
98
|
+
)
|
|
57
99
|
|
|
58
100
|
@cached_property
|
|
59
101
|
def case_number(self) -> Optional[str]:
|
|
@@ -144,6 +186,13 @@ class DocumentBody:
|
|
|
144
186
|
|
|
145
187
|
return False
|
|
146
188
|
|
|
189
|
+
@cached_property
|
|
190
|
+
def has_external_data(self) -> bool:
|
|
191
|
+
"""Is there data which is not present within the source document:
|
|
192
|
+
is there a spreadsheet which has populated some fields. The current implementation
|
|
193
|
+
"is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
|
|
194
|
+
return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
|
|
195
|
+
|
|
147
196
|
@cache
|
|
148
197
|
def content_html(self, image_prefix: str) -> Optional[str]:
|
|
149
198
|
"""Convert the XML representation of the Document into HTML for rendering."""
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Any, Optional, TypedDict
|
|
4
|
+
|
|
5
|
+
from typing_extensions import NotRequired
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnnotationDataDict(TypedDict):
|
|
9
|
+
type: str
|
|
10
|
+
calling_function: str
|
|
11
|
+
calling_agent: str
|
|
12
|
+
message: NotRequired[str]
|
|
13
|
+
payload: NotRequired[dict[str, Any]]
|
|
14
|
+
automated: bool
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VersionType(Enum):
|
|
18
|
+
"""Valid types of version."""
|
|
19
|
+
|
|
20
|
+
SUBMISSION = "submission"
|
|
21
|
+
""" This version has been created as a result of a submission of a new document. """
|
|
22
|
+
|
|
23
|
+
ENRICHMENT = "enrichment"
|
|
24
|
+
""" This version has been created through an enrichment process. """
|
|
25
|
+
|
|
26
|
+
EDIT = "edit"
|
|
27
|
+
""" This version has been created as the result of a manual edit. """
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VersionAnnotation:
|
|
31
|
+
"""A class holding structured data about the reason for a version."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
version_type: VersionType,
|
|
36
|
+
automated: bool,
|
|
37
|
+
message: Optional[str] = None,
|
|
38
|
+
payload: Optional[dict[str, Any]] = None,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
:param version_type: The type of version being created
|
|
42
|
+
:param automated: `True` if this action has happened as the result of an automated process, rather than a human
|
|
43
|
+
action
|
|
44
|
+
:param message: A human-readable string containing information about the version which can't be expressed in the
|
|
45
|
+
structured data.
|
|
46
|
+
:param payload: A dict containing additional information relevant to this version change
|
|
47
|
+
"""
|
|
48
|
+
self.version_type = version_type
|
|
49
|
+
self.automated = automated
|
|
50
|
+
self.message = message
|
|
51
|
+
self.payload = payload
|
|
52
|
+
|
|
53
|
+
self.calling_function: Optional[str] = None
|
|
54
|
+
self.calling_agent: Optional[str] = None
|
|
55
|
+
|
|
56
|
+
def set_calling_function(self, calling_function: str) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Set the name of the calling function for tracing purposes
|
|
59
|
+
|
|
60
|
+
:param calling_function: The name of the function which is performing the database write
|
|
61
|
+
"""
|
|
62
|
+
self.calling_function = calling_function
|
|
63
|
+
|
|
64
|
+
def set_calling_agent(self, calling_agent: str) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Set the name of the calling agent for tracing purposes
|
|
67
|
+
|
|
68
|
+
:param calling_agent: The name of the agent which is performing the database write
|
|
69
|
+
"""
|
|
70
|
+
self.calling_agent = calling_agent
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def structured_annotation_dict(self) -> AnnotationDataDict:
|
|
74
|
+
"""
|
|
75
|
+
:return: A structured dict representing this `VersionAnnotation`
|
|
76
|
+
|
|
77
|
+
:raises AttributeError: The name of the calling function has not been set; use `set_calling_function()`
|
|
78
|
+
:raises AttributeError: The name of the calling agent has not been set; use `set_calling_agent()`
|
|
79
|
+
"""
|
|
80
|
+
if not self.calling_function:
|
|
81
|
+
raise AttributeError(
|
|
82
|
+
"The name of the calling function has not been set; use set_calling_function()",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if not self.calling_agent:
|
|
86
|
+
raise AttributeError(
|
|
87
|
+
"The name of the calling agent has not been set; use set_calling_agent()",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
annotation_data: AnnotationDataDict = {
|
|
91
|
+
"type": self.version_type.value,
|
|
92
|
+
"calling_function": self.calling_function,
|
|
93
|
+
"calling_agent": self.calling_agent,
|
|
94
|
+
"automated": self.automated,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if self.message:
|
|
98
|
+
annotation_data["message"] = self.message
|
|
99
|
+
|
|
100
|
+
if self.payload:
|
|
101
|
+
annotation_data["payload"] = self.payload
|
|
102
|
+
|
|
103
|
+
return annotation_data
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def as_json(self) -> str:
|
|
107
|
+
"""Render the structured annotation data as JSON, so it can be stored in the MarkLogic dls:annotation field.
|
|
108
|
+
|
|
109
|
+
:return: A JSON string representing this `VersionAnnotation`"""
|
|
110
|
+
|
|
111
|
+
return json.dumps(self.structured_annotation_dict)
|
|
112
|
+
|
|
113
|
+
def __str__(self) -> str:
|
|
114
|
+
return self.as_json
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
|
|
3
3
|
from lxml import etree
|
|
4
4
|
|
|
5
|
-
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
5
|
+
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def _xslt_path(xslt_file_name: str) -> str:
|
|
@@ -50,6 +50,9 @@ class XML:
|
|
|
50
50
|
) -> list[str]:
|
|
51
51
|
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
52
52
|
|
|
53
|
+
def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
|
|
54
|
+
return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
|
|
55
|
+
|
|
53
56
|
def _modified(
|
|
54
57
|
self,
|
|
55
58
|
xslt: str,
|
|
@@ -46,7 +46,10 @@ class IdentifierSchema(ABC):
|
|
|
46
46
|
""" Should editors be allowed to manually manipulate identifiers under this schema? """
|
|
47
47
|
|
|
48
48
|
require_globally_unique: bool = True
|
|
49
|
-
""" Must this identifier be globally unique? """
|
|
49
|
+
""" Must this identifier be globally unique? (appear on no other documents) """
|
|
50
|
+
|
|
51
|
+
allow_multiple: bool = False
|
|
52
|
+
""" May documents have more than one non-deprecated identifier of this type? """
|
|
50
53
|
|
|
51
54
|
document_types: Optional[list[str]] = None
|
|
52
55
|
"""
|
|
@@ -43,6 +43,8 @@ class IdentifiersCollection(dict[str, Identifier]):
|
|
|
43
43
|
"""Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
|
|
44
44
|
|
|
45
45
|
for schema, identifiers in self._list_all_identifiers_by_schema().items():
|
|
46
|
+
if schema.allow_multiple:
|
|
47
|
+
continue
|
|
46
48
|
non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
|
|
47
49
|
if len(non_deprecated_identifiers) > 1:
|
|
48
50
|
return SuccessFailureMessageTuple(
|
|
@@ -2,6 +2,7 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import uuid
|
|
5
|
+
from collections.abc import Callable
|
|
5
6
|
from typing import Any, Literal, Optional, TypedDict, overload
|
|
6
7
|
|
|
7
8
|
import boto3
|
|
@@ -118,11 +119,20 @@ def generate_pdf_url(uri: DocumentURIString) -> str:
|
|
|
118
119
|
|
|
119
120
|
|
|
120
121
|
def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
122
|
+
delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: True)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def delete_some_from_bucket(
|
|
126
|
+
uri: DocumentURIString, bucket: str, filter: Callable[[ObjectIdentifierTypeDef], bool]
|
|
127
|
+
) -> None:
|
|
121
128
|
client = create_s3_client()
|
|
122
129
|
response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
|
|
123
130
|
|
|
124
131
|
if response.get("Contents"):
|
|
125
|
-
|
|
132
|
+
objects_to_maybe_delete: list[ObjectIdentifierTypeDef] = [
|
|
133
|
+
{"Key": obj["Key"]} for obj in response.get("Contents", [])
|
|
134
|
+
]
|
|
135
|
+
objects_to_delete = [obj for obj in objects_to_maybe_delete if filter(obj)]
|
|
126
136
|
client.delete_objects(
|
|
127
137
|
Bucket=bucket,
|
|
128
138
|
Delete={
|
|
@@ -131,6 +141,10 @@ def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
|
131
141
|
)
|
|
132
142
|
|
|
133
143
|
|
|
144
|
+
def delete_non_targz_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
145
|
+
delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: not x["Key"].endswith(".tar.gz"))
|
|
146
|
+
|
|
147
|
+
|
|
134
148
|
def publish_documents(uri: DocumentURIString) -> None:
|
|
135
149
|
"""
|
|
136
150
|
Copy assets from the unpublished bucket to the published one.
|
|
@@ -217,6 +231,26 @@ def copy_assets(old_uri: DocumentURIString, new_uri: DocumentURIString) -> None:
|
|
|
217
231
|
)
|
|
218
232
|
|
|
219
233
|
|
|
234
|
+
def are_unpublished_assets_clean(uri: DocumentURIString) -> bool:
|
|
235
|
+
"""Returns true if all non-tar.gz assets in the relevant S3 bucket have been cleaned
|
|
236
|
+
(they have a DOCUMENT_PROCESSOR_VERSION tag)
|
|
237
|
+
Note: if there are no assets, then this returns true."""
|
|
238
|
+
client = create_s3_client()
|
|
239
|
+
bucket = env("PRIVATE_ASSET_BUCKET")
|
|
240
|
+
response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
|
|
241
|
+
for result in response.get("Contents", []):
|
|
242
|
+
file_key = str(result["Key"])
|
|
243
|
+
# ignore original tar.gz files
|
|
244
|
+
if file_key.endswith(".tar.gz"):
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
# check if assets are tagged as being processed by S3
|
|
248
|
+
tag_response = client.get_object_tagging(Bucket=bucket, Key=file_key)
|
|
249
|
+
if not (any(tag["Key"] == "DOCUMENT_PROCESSOR_VERSION" for tag in tag_response["TagSet"])):
|
|
250
|
+
return False
|
|
251
|
+
return True
|
|
252
|
+
|
|
253
|
+
|
|
220
254
|
def build_new_key(old_key: str, new_uri: DocumentURIString) -> str:
|
|
221
255
|
"""Ensure that DOCX and PDF filenames are modified to reflect their new home
|
|
222
256
|
as we get the name of the new S3 path"""
|
|
@@ -13,6 +13,8 @@ from lxml import etree
|
|
|
13
13
|
|
|
14
14
|
from caselawclient.Client import MarklogicApiClient
|
|
15
15
|
from caselawclient.models.identifiers.collection import IdentifiersCollection
|
|
16
|
+
from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
|
|
17
|
+
from caselawclient.models.identifiers.press_summary_ncn import PressSummaryRelatedNCNIdentifier
|
|
16
18
|
from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
|
|
17
19
|
from caselawclient.types import DocumentURIString
|
|
18
20
|
from caselawclient.xml_helpers import get_xpath_match_string
|
|
@@ -196,14 +198,18 @@ class SearchResult:
|
|
|
196
198
|
return str(preferred.url_slug)
|
|
197
199
|
|
|
198
200
|
@property
|
|
199
|
-
def neutral_citation(self) -> str:
|
|
201
|
+
def neutral_citation(self) -> Optional[str]:
|
|
200
202
|
"""
|
|
201
|
-
:return:
|
|
203
|
+
:return: If present, the value of preferred neutral citation of the document.
|
|
202
204
|
"""
|
|
203
205
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
preferred_ncn = self.identifiers.preferred(type=NeutralCitationNumber)
|
|
207
|
+
|
|
208
|
+
# If the result doesn't have a preferred NCN, maybe it has a preferred press summary related NCN?
|
|
209
|
+
if not preferred_ncn:
|
|
210
|
+
preferred_ncn = self.identifiers.preferred(type=PressSummaryRelatedNCNIdentifier)
|
|
211
|
+
|
|
212
|
+
return preferred_ncn.value if preferred_ncn else None
|
|
207
213
|
|
|
208
214
|
@property
|
|
209
215
|
def name(self) -> str:
|
|
@@ -15,6 +15,8 @@ class SearchParameters:
|
|
|
15
15
|
judge: Optional[str] = None
|
|
16
16
|
party: Optional[str] = None
|
|
17
17
|
neutral_citation: Optional[str] = None
|
|
18
|
+
document_name: Optional[str] = None
|
|
19
|
+
consignment_number: Optional[str] = None
|
|
18
20
|
specific_keyword: Optional[str] = None
|
|
19
21
|
order: Optional[str] = None
|
|
20
22
|
date_from: Optional[str] = None
|
|
@@ -39,6 +41,8 @@ class SearchParameters:
|
|
|
39
41
|
"q": str(self.query or ""),
|
|
40
42
|
"party": str(self.party or ""),
|
|
41
43
|
"neutral_citation": str(self.neutral_citation or ""),
|
|
44
|
+
"document_name": str(self.document_name or ""),
|
|
45
|
+
"consignment_number": str(self.consignment_number or ""),
|
|
42
46
|
"specific_keyword": str(self.specific_keyword or ""),
|
|
43
47
|
"order": str(self.order or ""),
|
|
44
48
|
"from": str(self.date_from or ""),
|
caselawclient/types.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class DocumentCategory:
|
|
6
|
+
name: str
|
|
7
|
+
subcategories: list["DocumentCategory"] = field(default_factory=list)
|
|
2
8
|
|
|
3
9
|
|
|
4
10
|
class InvalidDocumentURIException(Exception):
|
|
@@ -63,9 +69,40 @@ class DocumentIdentifierValue(str):
|
|
|
63
69
|
pass
|
|
64
70
|
|
|
65
71
|
|
|
66
|
-
SuccessFailureMessageTuple
|
|
67
|
-
"""
|
|
68
|
-
|
|
72
|
+
class SuccessFailureMessageTuple(tuple[bool, list[str]]):
|
|
73
|
+
"""
|
|
74
|
+
Return whether an operation has succeeded or failed
|
|
75
|
+
(and optionally a list of messages associated with that operation).
|
|
76
|
+
Typically the messages will be exposed to the end-user.
|
|
77
|
+
Use only where a failure is a routine event (such as during validation).
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __new__(cls, success: bool, messages: list[str]) -> "SuccessFailureMessageTuple":
|
|
81
|
+
return super().__new__(cls, [success, messages])
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def success(self) -> bool:
|
|
85
|
+
return self[0]
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def messages(self) -> list[str]:
|
|
89
|
+
return self[1]
|
|
90
|
+
|
|
91
|
+
def __repr__(self) -> str:
|
|
92
|
+
return f"SuccessFailureMessageTuple({self.success!r}, {self.messages!r})"
|
|
93
|
+
|
|
94
|
+
def __bool__(self) -> bool:
|
|
95
|
+
return self.success
|
|
96
|
+
|
|
97
|
+
def __or__(self, other: "SuccessFailureMessageTuple") -> "SuccessFailureMessageTuple":
|
|
98
|
+
"""Allows us to write combined_tuple = first_tuple | second_tuple"""
|
|
99
|
+
return SuccessFailureMessageTuple(self.success and other.success, self.messages + other.messages)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def SuccessTuple() -> SuccessFailureMessageTuple:
|
|
103
|
+
return SuccessFailureMessageTuple(True, [])
|
|
104
|
+
|
|
69
105
|
|
|
70
|
-
|
|
71
|
-
|
|
106
|
+
def FailureTuple(message: str | list[str]) -> SuccessFailureMessageTuple:
|
|
107
|
+
messages = message if isinstance(message, list) else [message]
|
|
108
|
+
return SuccessFailureMessageTuple(success=False, messages=messages)
|
caselawclient/xml_helpers.py
CHANGED
|
@@ -7,9 +7,25 @@ DEFAULT_NAMESPACES = {
|
|
|
7
7
|
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
# _Element is the only class lxml exposes, so need to use the private class for typing
|
|
11
|
+
Element = etree._Element # noqa: SLF001
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_xpath_nodes(
|
|
15
|
+
node: Element,
|
|
16
|
+
path: str,
|
|
17
|
+
namespaces: Optional[Dict[str, str]] = None,
|
|
18
|
+
) -> list[Element]:
|
|
19
|
+
result = node.xpath(path, namespaces=namespaces)
|
|
20
|
+
|
|
21
|
+
if not isinstance(result, list) or not all(isinstance(x, Element) for x in result):
|
|
22
|
+
raise TypeError(f"Expected to return list[Element], got {type(result).__name__}")
|
|
23
|
+
|
|
24
|
+
return result
|
|
25
|
+
|
|
10
26
|
|
|
11
27
|
def get_xpath_match_string(
|
|
12
|
-
node:
|
|
28
|
+
node: Element,
|
|
13
29
|
path: str,
|
|
14
30
|
namespaces: Optional[Dict[str, str]] = None,
|
|
15
31
|
fallback: str = "",
|
|
@@ -18,7 +34,7 @@ def get_xpath_match_string(
|
|
|
18
34
|
|
|
19
35
|
|
|
20
36
|
def get_xpath_match_strings(
|
|
21
|
-
node:
|
|
37
|
+
node: Element,
|
|
22
38
|
path: str,
|
|
23
39
|
namespaces: Optional[Dict[str, str]] = None,
|
|
24
40
|
) -> list[str]:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
declare namespace akn = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0";
|
|
3
|
+
declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";
|
|
4
|
+
declare variable $uri as xs:string external;
|
|
5
|
+
|
|
6
|
+
let $doc := doc($uri)
|
|
7
|
+
let $hash := $doc//uk:hash/text()
|
|
8
|
+
let $count := count(cts:uris(
|
|
9
|
+
(), (),
|
|
10
|
+
cts:and-query((
|
|
11
|
+
cts:element-value-query(xs:QName("uk:hash"), $hash),
|
|
12
|
+
cts:collection-query("http://marklogic.com/collections/dls/latest-version")
|
|
13
|
+
))
|
|
14
|
+
))
|
|
15
|
+
return $count = 1
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
import module namespace dls = "http://marklogic.com/xdmp/dls" at "/MarkLogic/dls.xqy";
|
|
4
|
+
|
|
5
|
+
declare variable $uri as xs:string external;
|
|
6
|
+
declare variable $value as xs:string external;
|
|
7
|
+
declare variable $name as xs:string external;
|
|
8
|
+
|
|
9
|
+
let $props := ( element {$name}{xs:dateTime($value)} )
|
|
10
|
+
|
|
11
|
+
return dls:document-set-property($uri, $props)
|
|
@@ -23,6 +23,11 @@ class BreakJudgmentCheckoutDict(MarkLogicAPIDict):
|
|
|
23
23
|
uri: MarkLogicDocumentURIString
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
# check_content_hash_unique_by_uri.xqy
|
|
27
|
+
class CheckContentHashUniqueByUriDict(MarkLogicAPIDict):
|
|
28
|
+
uri: MarkLogicDocumentURIString
|
|
29
|
+
|
|
30
|
+
|
|
26
31
|
# checkin_judgment.xqy
|
|
27
32
|
class CheckinJudgmentDict(MarkLogicAPIDict):
|
|
28
33
|
uri: MarkLogicDocumentURIString
|
|
@@ -166,6 +171,13 @@ class SetBooleanPropertyDict(MarkLogicAPIDict):
|
|
|
166
171
|
value: str
|
|
167
172
|
|
|
168
173
|
|
|
174
|
+
# set_datetime_property.xqy
|
|
175
|
+
class SetDatetimePropertyDict(MarkLogicAPIDict):
|
|
176
|
+
name: str
|
|
177
|
+
uri: MarkLogicDocumentURIString
|
|
178
|
+
value: str
|
|
179
|
+
|
|
180
|
+
|
|
169
181
|
# set_metadata_citation.xqy
|
|
170
182
|
class SetMetadataCitationDict(MarkLogicAPIDict):
|
|
171
183
|
content: str
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 43.1.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Keywords: national archives,caselaw
|
|
6
6
|
Author: The National Archives
|
|
7
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.12.0,<4.0.0
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
11
9
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
10
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
11
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
14
|
-
Requires-Dist: certifi (>=2025.
|
|
12
|
+
Requires-Dist: certifi (>=2025.10.5,<2025.11.0)
|
|
15
13
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
14
|
Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
|
|
17
15
|
Requires-Dist: django-environ (>=0.12.0)
|
|
@@ -21,6 +19,7 @@ Requires-Dist: lxml (>=6.0.0,<7.0.0)
|
|
|
21
19
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
22
20
|
Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
|
|
23
21
|
Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.12.3,<3.0.0)
|
|
24
23
|
Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
|
|
25
24
|
Requires-Dist: pytz (>2024)
|
|
26
25
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
|
@@ -1,21 +1,25 @@
|
|
|
1
|
-
caselawclient/Client.py,sha256=
|
|
1
|
+
caselawclient/Client.py,sha256=OZ7ee3QVvN59go2oHxndgRgAKSMaVAn-mmG7E9rpPgg,47415
|
|
2
2
|
caselawclient/__init__.py,sha256=QZtsOB_GR5XfFnWMJ6E9_fBany-JXFIrQmzs1mD_KVg,1225
|
|
3
|
-
caselawclient/client_helpers/__init__.py,sha256=
|
|
3
|
+
caselawclient/client_helpers/__init__.py,sha256=tpXWjwBAqOf8ChtSiEeMhdkiO7tVbfZ4FfQXsXaGJlI,1221
|
|
4
4
|
caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
|
|
5
5
|
caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
|
|
6
6
|
caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
|
|
7
|
-
caselawclient/factories.py,sha256=
|
|
7
|
+
caselawclient/factories.py,sha256=HXJeWpN7__X462joco07ziNqMOOMr-wUPJ91Y69gFk8,7466
|
|
8
8
|
caselawclient/identifier_resolution.py,sha256=B5I1sD7o7YjzsXMECjbKjgiGLDda5bGhejsJ-lYpTIg,2429
|
|
9
|
+
caselawclient/managers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
caselawclient/managers/merge/__init__.py,sha256=Rd6YIGifT3TP6UOf0gBrRoYzK5MJqTPeRaIJznS8dgI,2078
|
|
11
|
+
caselawclient/managers/merge/checks.py,sha256=J9RBG6jZAKIZk27jdFq-BByoRLKWsViCfHDyA8ZM3qU,3205
|
|
9
12
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
10
|
-
caselawclient/models/documents/__init__.py,sha256=
|
|
11
|
-
caselawclient/models/documents/body.py,sha256=
|
|
13
|
+
caselawclient/models/documents/__init__.py,sha256=6OBZfMy_6FgLQ6ZnMvjKv1Poiq-UP7pb2TcxHx-uSTI,24705
|
|
14
|
+
caselawclient/models/documents/body.py,sha256=pzk3bm9FGIWfI0Hs8dBuzk6RCiA9M4GHfgOYKpNlzyE,8455
|
|
12
15
|
caselawclient/models/documents/comparison.py,sha256=KwFZQByOcYcZKe8csjAntttACKq4BZb28n2VeV5rK54,1355
|
|
13
16
|
caselawclient/models/documents/exceptions.py,sha256=te7PPQTDHjZ9EYVg5pVaiZfF00lMBFy333PHj8_mkC4,443
|
|
14
17
|
caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
|
|
15
18
|
caselawclient/models/documents/transforms/html.xsl,sha256=XyUQLFcJ7_GwthWQ6ShU0bmzrgpl7xDFU-U8VLgOvEs,38258
|
|
16
|
-
caselawclient/models/documents/
|
|
17
|
-
caselawclient/models/
|
|
18
|
-
caselawclient/models/identifiers/
|
|
19
|
+
caselawclient/models/documents/versions.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
|
|
20
|
+
caselawclient/models/documents/xml.py,sha256=uGRULm_XcA9ABZmwTxxwwysPItQl1qnMd2pUVTZprgc,2376
|
|
21
|
+
caselawclient/models/identifiers/__init__.py,sha256=Vp5zJdJSskCuUOUwmPDiDvVlNsYmPRH350-wRx7Q8Dc,7877
|
|
22
|
+
caselawclient/models/identifiers/collection.py,sha256=1fw9yAuHBBMCgAfYRwgpoIPHW_vWQA-eCGDBnWI-gWI,7511
|
|
19
23
|
caselawclient/models/identifiers/exceptions.py,sha256=6LVjvx-UOwqkrpxU19ydmrphKNw0rcG5GXwjTFyf8Dk,130
|
|
20
24
|
caselawclient/models/identifiers/fclid.py,sha256=hj8z-VhXFrUHKOY6k_ItPvOakIvbhJ5xEbZ04E2j7t8,1521
|
|
21
25
|
caselawclient/models/identifiers/neutral_citation.py,sha256=bYAeXHVm_ls0aDTeYI4uv35iZmJGSKU4-H-iLh2xED0,2912
|
|
@@ -26,18 +30,19 @@ caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_Jfwkbg
|
|
|
26
30
|
caselawclient/models/parser_logs.py,sha256=iOhKTAAi87XQvxz1DHjF2lrqScD19g_c8EjSf0vPdfs,364
|
|
27
31
|
caselawclient/models/press_summaries.py,sha256=rtrYs_3BazUXxdA2oYmIJ6YIAiVlKeyc1aSF9uvkJJU,2196
|
|
28
32
|
caselawclient/models/utilities/__init__.py,sha256=LPhyrQwLKc5tIJUO8Bysn9wCiR6Z6jMMTksjOV4JH9U,1041
|
|
29
|
-
caselawclient/models/utilities/aws.py,sha256=
|
|
33
|
+
caselawclient/models/utilities/aws.py,sha256=s86_kOpnyc-seefy7eZQsnE4v3b5TZCAVhESiDEpqx8,10084
|
|
30
34
|
caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
|
|
31
35
|
caselawclient/models/utilities/move.py,sha256=MXdUqkSiyqRb8YKs_66B6ICWn8EWM6DiJV95fuJO1Us,3610
|
|
32
36
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
37
|
caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
|
|
34
38
|
caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
|
|
35
|
-
caselawclient/responses/search_result.py,sha256=
|
|
39
|
+
caselawclient/responses/search_result.py,sha256=glcoCeo2xO-17aV2pcpyfgl0_UjjEUqHfm2kVylXCwk,9782
|
|
36
40
|
caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
|
|
37
|
-
caselawclient/search_parameters.py,sha256=
|
|
38
|
-
caselawclient/types.py,sha256=
|
|
39
|
-
caselawclient/xml_helpers.py,sha256=
|
|
41
|
+
caselawclient/search_parameters.py,sha256=BQzDrfxqyZTkqgNj8Ruy-lSr_m4bYrUzUKrqCxB8GrM,3699
|
|
42
|
+
caselawclient/types.py,sha256=Q1RE2HMZUGKTuDzg9wARq9AuVaIIlDyYFE4tqhpcN_U,3630
|
|
43
|
+
caselawclient/xml_helpers.py,sha256=31cxsDu680SFi3gR35rL7EdBZaW6r6mt4zvWHjJeX9o,1131
|
|
40
44
|
caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
|
|
45
|
+
caselawclient/xquery/check_content_hash_unique_by_uri.xqy,sha256=kXfJL0sclcCulsaw5KGgVCyuiIfINkSeMwFGXSvuYME,494
|
|
41
46
|
caselawclient/xquery/checkin_judgment.xqy,sha256=QeGqO3kL-q0UrjopCVU0lCbkwbyoc5SuNLYFAIbbyMg,197
|
|
42
47
|
caselawclient/xquery/checkout_judgment.xqy,sha256=aRwVo4KXoEKXfXRZ6IrVfvh0pXK-7pFxVIgEyzE5DRY,385
|
|
43
48
|
caselawclient/xquery/copy_document.xqy,sha256=GwgafibZhUB4rZ7x5wmHAKi0DO1aEWNVithkguwsVGE,453
|
|
@@ -68,6 +73,7 @@ caselawclient/xquery/list_judgment_versions.xqy,sha256=WShga8igeD21hSLfVSvCOiDMP
|
|
|
68
73
|
caselawclient/xquery/resolve_from_identifier_slug.xqy,sha256=jMaOugdG1XVQIk9AR5NOth8D2RS8VEUJuBwjO9j4MFM,485
|
|
69
74
|
caselawclient/xquery/resolve_from_identifier_value.xqy,sha256=7uP3DnRi67qSp0aUhW6Cv_GA8BQGw6GuvtAghjrT7Z4,493
|
|
70
75
|
caselawclient/xquery/set_boolean_property.xqy,sha256=8Vg3yDWqeDynUJQHw2OF4daDIKTnp8ARol1_OCqY0Dk,355
|
|
76
|
+
caselawclient/xquery/set_datetime_property.xqy,sha256=61NuWft1DlpROwdkDLHJ2rcHDqKAFoD45XQ_nmdBkLY,356
|
|
71
77
|
caselawclient/xquery/set_metadata_citation.xqy,sha256=ImwijXowvOCiH_br_LepnKsEpys9tg4Cf3uz6MoC5-c,659
|
|
72
78
|
caselawclient/xquery/set_metadata_court.xqy,sha256=xQGR3e4pdJuDPMlzdAdzrBDSeQbEFiLVIm2z_KQI_Ds,996
|
|
73
79
|
caselawclient/xquery/set_metadata_jurisdiction.xqy,sha256=7iG1uFZOme0_d1hkzLJ870ot_ioFnSwDROfA-_yGtN8,1059
|
|
@@ -84,10 +90,10 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
|
|
|
84
90
|
caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
|
|
85
91
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
86
92
|
caselawclient/xquery/xslt_transform.xqy,sha256=cccaFiGkCcvSfDv007UriZ3I4ak2nTLP1trRZdbOoS8,2462
|
|
87
|
-
caselawclient/xquery_type_dicts.py,sha256=
|
|
93
|
+
caselawclient/xquery_type_dicts.py,sha256=f4PM8yZi5RRMdL2lQ8tsLUs0aJjBa5chvd-VVj40fJY,6767
|
|
88
94
|
caselawclient/xslt/modify_xml_live.xsl,sha256=gNjwBun2-UzOeeuf0wNjFtN3jXm1yrwqv_KT8r1slXw,2370
|
|
89
95
|
caselawclient/xslt/sample.xsl,sha256=IG-v77stjwqiw25pguh391K-5DTKiX651WqILDZixm0,825
|
|
90
|
-
ds_caselaw_marklogic_api_client-
|
|
91
|
-
ds_caselaw_marklogic_api_client-
|
|
92
|
-
ds_caselaw_marklogic_api_client-
|
|
93
|
-
ds_caselaw_marklogic_api_client-
|
|
96
|
+
ds_caselaw_marklogic_api_client-43.1.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
97
|
+
ds_caselaw_marklogic_api_client-43.1.0.dist-info/METADATA,sha256=P-ddB6xJtOQa9vfR5Yl5iFIxXZL2mOgeLRg-YxVQRMY,4408
|
|
98
|
+
ds_caselaw_marklogic_api_client-43.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
99
|
+
ds_caselaw_marklogic_api_client-43.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|