ds-caselaw-marklogic-api-client 27.4.0__tar.gz → 28.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/PKG-INFO +1 -1
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/pyproject.toml +1 -1
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/Client.py +78 -2
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/factories.py +3 -2
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/identifier_resolution.py +43 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/__init__.py +42 -7
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/exceptions.py +4 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/xml.py +1 -1
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/models/identifiers/__init__.py +146 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/models/identifiers/neutral_citation.py +49 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/models/identifiers/unpacker.py +44 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/judgments.py +7 -6
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/press_summaries.py +7 -6
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/utilities/__init__.py +2 -2
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/utilities/aws.py +2 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xml_helpers.py +2 -4
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_components_for_document.xqy +1 -1
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/xquery/get_property_as_node.xqy +9 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/xquery/get_recently_enriched.xqy +18 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/xquery/get_recently_parsed.xqy +19 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/xquery/resolve_from_identifier.xqy +17 -0
- ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/xquery/set_property_as_node.xqy +11 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery_type_dicts.py +19 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/README.md +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/client_helpers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/errors.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/body.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/statuses.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/responses/search_result.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/search_parameters.py +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/xslt.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 28.1.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
{ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "28.1.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -13,12 +13,14 @@ from xml.etree.ElementTree import Element, ParseError, fromstring
|
|
|
13
13
|
import environ
|
|
14
14
|
import requests
|
|
15
15
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
16
|
+
from lxml import etree
|
|
16
17
|
from requests.auth import HTTPBasicAuth
|
|
17
18
|
from requests.structures import CaseInsensitiveDict
|
|
18
19
|
from requests_toolbelt.multipart import decoder
|
|
19
20
|
|
|
20
21
|
from caselawclient import xquery_type_dicts as query_dicts
|
|
21
22
|
from caselawclient.client_helpers import VersionAnnotation
|
|
23
|
+
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
22
24
|
from caselawclient.models.documents import (
|
|
23
25
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
24
26
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
@@ -209,12 +211,14 @@ class MarklogicApiClient:
|
|
|
209
211
|
Returns a list of PressSummary objects associated with a given Document URI
|
|
210
212
|
"""
|
|
211
213
|
vars: query_dicts.GetComponentsForDocumentDict = {
|
|
212
|
-
"parent_uri":
|
|
214
|
+
"parent_uri": uri,
|
|
213
215
|
"component": "pressSummary",
|
|
214
216
|
}
|
|
215
217
|
response = self._send_to_eval(vars, "get_components_for_document.xqy")
|
|
216
218
|
uris = get_multipart_strings_from_marklogic_response(response)
|
|
217
|
-
return [
|
|
219
|
+
return [
|
|
220
|
+
PressSummary(DocumentURIString(uri.strip("/").strip(".xml")), self) for uri in uris
|
|
221
|
+
] # TODO: Migrate this strip behaviour into proper manipulation of a MarkLogicURIString
|
|
218
222
|
|
|
219
223
|
def get_document_by_uri(
|
|
220
224
|
self,
|
|
@@ -862,6 +866,17 @@ class MarklogicApiClient:
|
|
|
862
866
|
}
|
|
863
867
|
return self._eval_and_decode(vars, "get_property.xqy")
|
|
864
868
|
|
|
869
|
+
def get_property_as_node(self, judgment_uri: DocumentURIString, name: str) -> Optional[etree._Element]:
|
|
870
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
871
|
+
vars: query_dicts.GetPropertyAsNodeDict = {
|
|
872
|
+
"uri": uri,
|
|
873
|
+
"name": name,
|
|
874
|
+
}
|
|
875
|
+
value = self._eval_and_decode(vars, "get_property_as_node.xqy")
|
|
876
|
+
if not value:
|
|
877
|
+
return None
|
|
878
|
+
return etree.fromstring(value)
|
|
879
|
+
|
|
865
880
|
def get_version_annotation(self, judgment_uri: DocumentURIString) -> str:
|
|
866
881
|
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
867
882
|
vars: query_dicts.GetVersionAnnotationDict = {
|
|
@@ -894,6 +909,22 @@ class MarklogicApiClient:
|
|
|
894
909
|
|
|
895
910
|
return self._send_to_eval(vars, "set_property.xqy")
|
|
896
911
|
|
|
912
|
+
def set_property_as_node(
|
|
913
|
+
self,
|
|
914
|
+
judgment_uri: DocumentURIString,
|
|
915
|
+
name: str,
|
|
916
|
+
value: etree._Element,
|
|
917
|
+
) -> requests.Response:
|
|
918
|
+
"""Given a root node, set the value of the MarkLogic property for a document to the _contents_ of that root node. The root node itself is discarded."""
|
|
919
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
920
|
+
vars: query_dicts.SetPropertyAsNodeDict = {
|
|
921
|
+
"uri": uri,
|
|
922
|
+
"value": etree.tostring(value).decode(),
|
|
923
|
+
"name": name,
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
return self._send_to_eval(vars, "set_property_as_node.xqy")
|
|
927
|
+
|
|
897
928
|
def set_boolean_property(
|
|
898
929
|
self,
|
|
899
930
|
judgment_uri: DocumentURIString,
|
|
@@ -1107,6 +1138,21 @@ class MarklogicApiClient:
|
|
|
1107
1138
|
|
|
1108
1139
|
return results
|
|
1109
1140
|
|
|
1141
|
+
def get_recently_enriched(
|
|
1142
|
+
self,
|
|
1143
|
+
) -> list[list[Any]]:
|
|
1144
|
+
"""Retrieve documents which are not yet enriched with a given version."""
|
|
1145
|
+
results: list[list[Any]] = json.loads(
|
|
1146
|
+
get_single_string_from_marklogic_response(
|
|
1147
|
+
self._send_to_eval(
|
|
1148
|
+
{},
|
|
1149
|
+
"get_recently_enriched.xqy",
|
|
1150
|
+
),
|
|
1151
|
+
),
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
return results
|
|
1155
|
+
|
|
1110
1156
|
def get_highest_parser_version(self) -> tuple[int, int]:
|
|
1111
1157
|
"""This gets the highest parser version in the database, so if nothing has been parsed with the most recent version of the parser, this won't reflect that change."""
|
|
1112
1158
|
table = json.loads(
|
|
@@ -1141,3 +1187,33 @@ class MarklogicApiClient:
|
|
|
1141
1187
|
)
|
|
1142
1188
|
|
|
1143
1189
|
return results
|
|
1190
|
+
|
|
1191
|
+
def get_recently_parsed(
|
|
1192
|
+
self,
|
|
1193
|
+
) -> list[list[Any]]:
|
|
1194
|
+
"""Retrieve documents which are not yet enriched with a given version."""
|
|
1195
|
+
results: list[list[Any]] = json.loads(
|
|
1196
|
+
get_single_string_from_marklogic_response(
|
|
1197
|
+
self._send_to_eval(
|
|
1198
|
+
{},
|
|
1199
|
+
"get_recently_parsed.xqy",
|
|
1200
|
+
),
|
|
1201
|
+
),
|
|
1202
|
+
)
|
|
1203
|
+
|
|
1204
|
+
return results
|
|
1205
|
+
|
|
1206
|
+
def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
|
|
1207
|
+
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1208
|
+
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1209
|
+
vars: query_dicts.ResolveFromIdentifierDict = {
|
|
1210
|
+
"identifier_uri": DocumentURIString(identifier_uri),
|
|
1211
|
+
"published_only": int(published_only),
|
|
1212
|
+
}
|
|
1213
|
+
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1214
|
+
self._send_to_eval(
|
|
1215
|
+
vars,
|
|
1216
|
+
"resolve_from_identifier.xqy",
|
|
1217
|
+
),
|
|
1218
|
+
)
|
|
1219
|
+
return IdentifierResolutions.from_marklogic_output(raw_results)
|
|
@@ -5,7 +5,7 @@ from unittest.mock import Mock
|
|
|
5
5
|
from typing_extensions import TypeAlias
|
|
6
6
|
|
|
7
7
|
from caselawclient.Client import MarklogicApiClient
|
|
8
|
-
from caselawclient.models.documents import Document
|
|
8
|
+
from caselawclient.models.documents import Document, DocumentURIString
|
|
9
9
|
from caselawclient.models.documents.body import DocumentBody
|
|
10
10
|
from caselawclient.models.judgments import Judgment
|
|
11
11
|
from caselawclient.models.press_summaries import PressSummary
|
|
@@ -54,7 +54,7 @@ class DocumentFactory:
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def build(
|
|
56
56
|
cls,
|
|
57
|
-
uri:
|
|
57
|
+
uri: DocumentURIString = DocumentURIString("test/2023/123"),
|
|
58
58
|
html: str = "<p>This is a judgment.</p>",
|
|
59
59
|
api_client: Optional[MarklogicApiClient] = None,
|
|
60
60
|
**kwargs: Any,
|
|
@@ -62,6 +62,7 @@ class DocumentFactory:
|
|
|
62
62
|
if not api_client:
|
|
63
63
|
api_client = Mock(spec=MarklogicApiClient)
|
|
64
64
|
api_client.get_judgment_xml_bytestring.return_value = DEFAULT_DOCUMENT_BODY_XML.encode(encoding="utf-8")
|
|
65
|
+
api_client.get_property_as_node.return_value = None
|
|
65
66
|
|
|
66
67
|
document = cls.target_class(uri, api_client=api_client)
|
|
67
68
|
document.content_as_html = Mock(return_value=html) # type: ignore[method-assign]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import NamedTuple
|
|
3
|
+
|
|
4
|
+
from caselawclient.models.documents import DocumentURIString
|
|
5
|
+
from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class IdentifierResolutions(list["IdentifierResolution"]):
|
|
9
|
+
"""
|
|
10
|
+
A list of candidate MarkLogic documents which correspond to a Public UI uri
|
|
11
|
+
|
|
12
|
+
MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
|
|
13
|
+
which corresponds to a single identifier to MarkLogic document mapping.
|
|
14
|
+
|
|
15
|
+
see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py`
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def from_marklogic_output(table: list[str]) -> "IdentifierResolutions":
|
|
20
|
+
return IdentifierResolutions(list(IdentifierResolution.from_marklogic_output(row) for row in table))
|
|
21
|
+
|
|
22
|
+
def published(self) -> "IdentifierResolutions":
|
|
23
|
+
"Filter the list so that only published documents are returned"
|
|
24
|
+
return IdentifierResolutions(list(x for x in self if x.document_published))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class IdentifierResolution(NamedTuple):
|
|
28
|
+
"""A single response from MarkLogic about a single identifier / document mapping"""
|
|
29
|
+
|
|
30
|
+
identifier_uuid: str
|
|
31
|
+
document_uri: MarkLogicDocumentURIString
|
|
32
|
+
identifier_slug: DocumentURIString
|
|
33
|
+
document_published: bool
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
|
|
37
|
+
row = json.loads(raw_row)
|
|
38
|
+
return IdentifierResolution(
|
|
39
|
+
identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
|
|
40
|
+
document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
|
|
41
|
+
identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
|
|
42
|
+
document_published=row["documents.compiled_url_slugs.document_published"] == "true",
|
|
43
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import warnings
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
5
5
|
|
|
6
6
|
from ds_caselaw_utils import courts
|
|
7
7
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
@@ -15,6 +15,7 @@ from caselawclient.errors import (
|
|
|
15
15
|
NotSupportedOnVersion,
|
|
16
16
|
OnlySupportedOnVersion,
|
|
17
17
|
)
|
|
18
|
+
from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
|
|
18
19
|
from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
|
|
19
20
|
from caselawclient.models.utilities.aws import (
|
|
20
21
|
ParserInstructionsDict,
|
|
@@ -30,7 +31,7 @@ from caselawclient.models.utilities.aws import (
|
|
|
30
31
|
)
|
|
31
32
|
|
|
32
33
|
from .body import DocumentBody
|
|
33
|
-
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
34
|
+
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion, InvalidDocumentURIException
|
|
34
35
|
from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
|
|
35
36
|
|
|
36
37
|
MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
|
|
@@ -47,7 +48,26 @@ if TYPE_CHECKING:
|
|
|
47
48
|
from caselawclient.Client import MarklogicApiClient
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
DocumentURIString
|
|
51
|
+
class DocumentURIString(str):
|
|
52
|
+
"""
|
|
53
|
+
This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __new__(cls, content: str) -> "DocumentURIString":
|
|
57
|
+
# Check that the URI doesn't begin or end with a slash
|
|
58
|
+
if content[0] == "/" or content[-1] == "/":
|
|
59
|
+
raise InvalidDocumentURIException(
|
|
60
|
+
f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Check that the URI doesn't contain a full stop
|
|
64
|
+
if "." in content:
|
|
65
|
+
raise InvalidDocumentURIException(
|
|
66
|
+
f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# If everything is good, return as usual
|
|
70
|
+
return str.__new__(cls, content)
|
|
51
71
|
|
|
52
72
|
|
|
53
73
|
class Document:
|
|
@@ -105,13 +125,15 @@ class Document:
|
|
|
105
125
|
Individual document classes should extend this list where necessary to validate document type-specific attributes.
|
|
106
126
|
"""
|
|
107
127
|
|
|
108
|
-
def __init__(self, uri:
|
|
128
|
+
def __init__(self, uri: DocumentURIString, api_client: "MarklogicApiClient", search_query: Optional[str] = None):
|
|
109
129
|
"""
|
|
110
|
-
:param uri:
|
|
130
|
+
:param uri: The URI of the document to retrieve from MarkLogic.
|
|
131
|
+
:param api_client: An instance of the API client object to handle communication with the MarkLogic server.
|
|
132
|
+
:param search_query: Optionally, a search string which should be highlighted if it appears in the document body.
|
|
111
133
|
|
|
112
134
|
:raises DocumentNotFoundError: The document does not exist within MarkLogic
|
|
113
135
|
"""
|
|
114
|
-
self.uri: DocumentURIString =
|
|
136
|
+
self.uri: DocumentURIString = uri
|
|
115
137
|
self.api_client: MarklogicApiClient = api_client
|
|
116
138
|
if not self.document_exists():
|
|
117
139
|
raise DocumentNotFoundError(f"Document {self.uri} does not exist")
|
|
@@ -123,7 +145,9 @@ class Document:
|
|
|
123
145
|
search_query=search_query,
|
|
124
146
|
),
|
|
125
147
|
)
|
|
126
|
-
""" `Document.body` represents the
|
|
148
|
+
""" `Document.body` represents the body of the document itself, without any information such as version tracking or properties. """
|
|
149
|
+
|
|
150
|
+
self._initialise_identifiers()
|
|
127
151
|
|
|
128
152
|
def __repr__(self) -> str:
|
|
129
153
|
name = self.body.name or "un-named"
|
|
@@ -139,6 +163,12 @@ class Document:
|
|
|
139
163
|
"""There is a docx in S3 private bucket for this Document"""
|
|
140
164
|
return check_docx_exists(self.uri)
|
|
141
165
|
|
|
166
|
+
def _initialise_identifiers(self) -> None:
|
|
167
|
+
"""Load this document's identifiers from MarkLogic."""
|
|
168
|
+
|
|
169
|
+
identifiers_element_as_etree = self.api_client.get_property_as_node(self.uri, "identifiers")
|
|
170
|
+
self.identifiers = unpack_all_identifiers_from_etree(identifiers_element_as_etree)
|
|
171
|
+
|
|
142
172
|
@property
|
|
143
173
|
def best_human_identifier(self) -> Optional[str]:
|
|
144
174
|
"""
|
|
@@ -500,6 +530,11 @@ class Document:
|
|
|
500
530
|
"""
|
|
501
531
|
return self.docx_exists()
|
|
502
532
|
|
|
533
|
+
def save_identifiers(self) -> None:
|
|
534
|
+
"""Save the current state of this Document's identifiers to MarkLogic."""
|
|
535
|
+
self.identifiers.validate()
|
|
536
|
+
self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
|
|
537
|
+
|
|
503
538
|
def __getattr__(self, name: str) -> Any:
|
|
504
539
|
warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
|
|
505
540
|
try:
|
|
@@ -17,7 +17,7 @@ class XML:
|
|
|
17
17
|
:raises NonXMLDocumentError: This document is not valid XML
|
|
18
18
|
"""
|
|
19
19
|
try:
|
|
20
|
-
self.xml_as_tree: etree.
|
|
20
|
+
self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring)
|
|
21
21
|
except etree.XMLSyntaxError:
|
|
22
22
|
raise NonXMLDocumentError
|
|
23
23
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
from lxml import etree
|
|
6
|
+
|
|
7
|
+
IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = [
|
|
8
|
+
"uuid",
|
|
9
|
+
"value",
|
|
10
|
+
"url_slug",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
IDENTIFIER_UNPACKABLE_ATTRIBUTES: list[str] = [
|
|
14
|
+
"uuid",
|
|
15
|
+
"value",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InvalidIdentifierXMLRepresentationException(Exception):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UUIDMismatchError(Exception):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class IdentifierSchema(ABC):
|
|
28
|
+
"""
|
|
29
|
+
A base class which describes what an identifier schema should look like.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
name: str
|
|
33
|
+
namespace: str
|
|
34
|
+
|
|
35
|
+
def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
|
|
36
|
+
"""Ensure that subclasses have the required attributes set."""
|
|
37
|
+
for required in (
|
|
38
|
+
"name",
|
|
39
|
+
"namespace",
|
|
40
|
+
):
|
|
41
|
+
if not getattr(cls, required, False):
|
|
42
|
+
raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
|
|
43
|
+
super().__init_subclass__(**kwargs)
|
|
44
|
+
|
|
45
|
+
def __repr__(self) -> str:
|
|
46
|
+
return self.name
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def validate_identifier(cls, value: str) -> bool:
|
|
51
|
+
"""Check that any given identifier value is valid for this schema."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def compile_identifier_url_slug(cls, value: str) -> str:
|
|
57
|
+
"""Convert an identifier into a precompiled URL slug."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Identifier(ABC):
|
|
62
|
+
"""A base class for subclasses representing a concrete identifier."""
|
|
63
|
+
|
|
64
|
+
schema: type[IdentifierSchema]
|
|
65
|
+
|
|
66
|
+
uuid: str
|
|
67
|
+
value: str
|
|
68
|
+
|
|
69
|
+
def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None:
|
|
70
|
+
"""Ensure that subclasses have the required attributes set."""
|
|
71
|
+
for required in ("schema",):
|
|
72
|
+
if not getattr(cls, required, False):
|
|
73
|
+
raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.")
|
|
74
|
+
super().__init_subclass__(**kwargs)
|
|
75
|
+
|
|
76
|
+
def __repr__(self) -> str:
|
|
77
|
+
return f"<{self.schema.name} {self.value}: {self.uuid}>"
|
|
78
|
+
|
|
79
|
+
def __init__(self, value: str, uuid: Optional[str] = None) -> None:
|
|
80
|
+
self.value = value
|
|
81
|
+
if uuid:
|
|
82
|
+
self.uuid = uuid
|
|
83
|
+
else:
|
|
84
|
+
self.uuid = "id-" + str(uuid4())
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def as_xml_tree(self) -> etree._Element:
|
|
88
|
+
"""Convert this Identifier into a packed XML representation for storage."""
|
|
89
|
+
identifier_root = etree.Element("identifier")
|
|
90
|
+
|
|
91
|
+
namespace_attribute = etree.SubElement(identifier_root, "namespace")
|
|
92
|
+
namespace_attribute.text = self.schema.namespace
|
|
93
|
+
|
|
94
|
+
for attribute in IDENTIFIER_PACKABLE_ATTRIBUTES:
|
|
95
|
+
packed_attribute = etree.SubElement(identifier_root, attribute)
|
|
96
|
+
packed_attribute.text = getattr(self, attribute)
|
|
97
|
+
|
|
98
|
+
return identifier_root
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def url_slug(self) -> str:
|
|
102
|
+
return self.schema.compile_identifier_url_slug(self.value)
|
|
103
|
+
|
|
104
|
+
def same_as(self, other: "Identifier") -> bool:
|
|
105
|
+
"Is this the same as another identifier (in value and schema)?"
|
|
106
|
+
return self.value == other.value and self.schema == other.schema
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class Identifiers(dict[str, Identifier]):
|
|
110
|
+
def validate(self) -> None:
|
|
111
|
+
for uuid, identifier in self.items():
|
|
112
|
+
if uuid != identifier.uuid:
|
|
113
|
+
msg = "Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"
|
|
114
|
+
raise UUIDMismatchError(msg)
|
|
115
|
+
|
|
116
|
+
def contains(self, other_identifier: Identifier) -> bool:
|
|
117
|
+
"Do the identifier's value and namespace already exist in this group?"
|
|
118
|
+
return any(other_identifier.same_as(identifier) for identifier in self.values())
|
|
119
|
+
|
|
120
|
+
def add(self, identifier: Identifier) -> None:
|
|
121
|
+
if not self.contains(identifier):
|
|
122
|
+
self[identifier.uuid] = identifier
|
|
123
|
+
|
|
124
|
+
def __delitem__(self, key: Union[Identifier, str]) -> None:
|
|
125
|
+
if isinstance(key, Identifier):
|
|
126
|
+
super().__delitem__(key.uuid)
|
|
127
|
+
else:
|
|
128
|
+
super().__delitem__(key)
|
|
129
|
+
|
|
130
|
+
def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
|
|
131
|
+
"For when we want an identifier to be the only valid identifier of that type, delete the others first"
|
|
132
|
+
uuids = self.keys()
|
|
133
|
+
for uuid in list(uuids):
|
|
134
|
+
# we could use compare to .schema instead, which would have diffferent behaviour for subclasses
|
|
135
|
+
if isinstance(self[uuid], deleted_identifier_type):
|
|
136
|
+
del self[uuid]
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def as_etree(self) -> etree._Element:
|
|
140
|
+
"""Return an etree representation of all the Document's identifiers."""
|
|
141
|
+
identifiers_root = etree.Element("identifiers")
|
|
142
|
+
|
|
143
|
+
for identifier in self.values():
|
|
144
|
+
identifiers_root.append(identifier.as_xml_tree)
|
|
145
|
+
|
|
146
|
+
return identifiers_root
|
ds_caselaw_marklogic_api_client-28.1.0/src/caselawclient/models/identifiers/neutral_citation.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from ds_caselaw_utils import neutral_url
|
|
4
|
+
from ds_caselaw_utils.types import NeutralCitationString
|
|
5
|
+
|
|
6
|
+
from . import Identifier, IdentifierSchema
|
|
7
|
+
|
|
8
|
+
VALID_NCN_PATTERN = re.compile(r"(^\[([0-9]{4})\] ([a-zA-Z]+)(?: ([a-zA-Z]+))? ([0-9]+)(?: \(([a-zA-Z]+)\))?$)")
|
|
9
|
+
"""
|
|
10
|
+
This is a catch-all pattern for anything which looks like a Neutral Citation, even if the court itself isn't valid. Checking that an NCN is plausibly correct is handled elsewhere.
|
|
11
|
+
|
|
12
|
+
This pattern also defines five capture groups to standardise how we interface with the elements:
|
|
13
|
+
|
|
14
|
+
- `0`: The year of the decision
|
|
15
|
+
- `1`: The court
|
|
16
|
+
- `2`: (Optionally) the jurisdiction or division, depending on the court
|
|
17
|
+
- `3`: The sequence number of the decision
|
|
18
|
+
- `4`: (Optionally) the jurisdiction or division, depending on the court
|
|
19
|
+
|
|
20
|
+
TODO: When these capture groups are being used in anger (eg to build URL slugs) you should go through and name the groups.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NeutralCitationNumberSchema(IdentifierSchema):
|
|
25
|
+
"""
|
|
26
|
+
Identifier schema describing a Neutral Citation Number.
|
|
27
|
+
|
|
28
|
+
https://www.iclr.co.uk/knowledge/case-law/neutral-citations/
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
name = "Neutral Citation Number"
|
|
32
|
+
namespace = "ukncn"
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def validate_identifier(cls, value: str) -> bool:
|
|
36
|
+
return bool(VALID_NCN_PATTERN.match(value))
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def compile_identifier_url_slug(cls, value: str) -> str:
|
|
40
|
+
ncn_based_uri_string = neutral_url(
|
|
41
|
+
NeutralCitationString(value)
|
|
42
|
+
) # TODO: At some point this should move out of utils and into this class.
|
|
43
|
+
if not ncn_based_uri_string:
|
|
44
|
+
raise Exception(f"Unable to convert NCN {value} into NCN-based URL slug")
|
|
45
|
+
return ncn_based_uri_string
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class NeutralCitationNumber(Identifier):
|
|
49
|
+
schema = NeutralCitationNumberSchema
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lxml import etree
|
|
4
|
+
|
|
5
|
+
from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier, Identifiers, InvalidIdentifierXMLRepresentationException
|
|
6
|
+
from .neutral_citation import NeutralCitationNumber
|
|
7
|
+
|
|
8
|
+
IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = {
|
|
9
|
+
"ukncn": NeutralCitationNumber,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> Identifiers:
|
|
14
|
+
"""This expects the entire <identifiers> tag, and unpacks all Identifiers inside it"""
|
|
15
|
+
identifiers = Identifiers()
|
|
16
|
+
if identifiers_etree is None:
|
|
17
|
+
return identifiers
|
|
18
|
+
for identifier_etree in identifiers_etree.findall("identifier"):
|
|
19
|
+
identifier = unpack_an_identifier_from_etree(identifier_etree)
|
|
20
|
+
identifiers.add(identifier)
|
|
21
|
+
return identifiers
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifier:
|
|
25
|
+
"""Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier."""
|
|
26
|
+
|
|
27
|
+
namespace_element = identifier_xml.find("namespace")
|
|
28
|
+
|
|
29
|
+
if namespace_element is None or not namespace_element.text:
|
|
30
|
+
raise InvalidIdentifierXMLRepresentationException(
|
|
31
|
+
"Identifer XML representation is not valid: namespace not present or empty"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
kwargs: dict[str, str] = {}
|
|
35
|
+
|
|
36
|
+
for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES:
|
|
37
|
+
element = identifier_xml.find(attribute)
|
|
38
|
+
if element is None or not element.text:
|
|
39
|
+
raise InvalidIdentifierXMLRepresentationException(
|
|
40
|
+
f"Identifer XML representation is not valid: {element} not present or empty"
|
|
41
|
+
)
|
|
42
|
+
kwargs[attribute] = element.text
|
|
43
|
+
|
|
44
|
+
return IDENTIFIER_NAMESPACE_MAP[namespace_element.text](**kwargs)
|
|
@@ -10,7 +10,7 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from caselawclient.models.press_summaries import PressSummary
|
|
12
12
|
|
|
13
|
-
from .documents import Document
|
|
13
|
+
from .documents import Document, DocumentURIString
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class Judgment(NeutralCitationMixin, Document):
|
|
@@ -21,8 +21,8 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
21
21
|
document_noun = "judgment"
|
|
22
22
|
document_noun_plural = "judgments"
|
|
23
23
|
|
|
24
|
-
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
25
|
-
super().__init__(self.document_noun, *args, **kwargs)
|
|
24
|
+
def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
|
|
25
|
+
super().__init__(self.document_noun, uri, *args, **kwargs)
|
|
26
26
|
|
|
27
27
|
@cached_property
|
|
28
28
|
def neutral_citation(self) -> NeutralCitationString:
|
|
@@ -46,8 +46,9 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
46
46
|
Attempt to fetch a linked press summary, and return it, if it exists
|
|
47
47
|
"""
|
|
48
48
|
try:
|
|
49
|
-
uri = self.uri + "/press-summary/1"
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
uri = DocumentURIString(self.uri + "/press-summary/1")
|
|
50
|
+
if not TYPE_CHECKING: # This isn't nice, but will be cleaned up when we refactor how related documents work
|
|
51
|
+
PressSummary = importlib.import_module("caselawclient.models.press_summaries").PressSummary
|
|
52
|
+
return PressSummary(uri, self.api_client)
|
|
52
53
|
except DocumentNotFoundError:
|
|
53
54
|
return None
|
|
@@ -9,7 +9,7 @@ from ds_caselaw_utils.types import NeutralCitationString
|
|
|
9
9
|
from caselawclient.errors import DocumentNotFoundError
|
|
10
10
|
from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
11
11
|
|
|
12
|
-
from .documents import Document
|
|
12
|
+
from .documents import Document, DocumentURIString
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from caselawclient.models.judgments import Judgment
|
|
@@ -23,8 +23,8 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
23
23
|
document_noun = "press summary"
|
|
24
24
|
document_noun_plural = "press summaries"
|
|
25
25
|
|
|
26
|
-
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
27
|
-
super().__init__(self.document_noun, *args, **kwargs)
|
|
26
|
+
def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
|
|
27
|
+
super().__init__(self.document_noun, uri, *args, **kwargs)
|
|
28
28
|
|
|
29
29
|
@cached_property
|
|
30
30
|
def neutral_citation(self) -> NeutralCitationString:
|
|
@@ -47,8 +47,9 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
47
47
|
Attempt to fetch a linked judgement, and return it, if it exists
|
|
48
48
|
"""
|
|
49
49
|
try:
|
|
50
|
-
uri = self.uri.removesuffix("/press-summary/1")
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
uri = DocumentURIString(self.uri.removesuffix("/press-summary/1"))
|
|
51
|
+
if not TYPE_CHECKING: # This isn't nice, but will be cleaned up when we refactor how related documents work
|
|
52
|
+
Judgment = importlib.import_module("caselawclient.models.judgments").Judgment
|
|
53
|
+
return Judgment(uri, self.api_client)
|
|
53
54
|
except DocumentNotFoundError:
|
|
54
55
|
return None
|
|
@@ -12,14 +12,14 @@ uk_namespace = {"uk": "https://caselaw.nationalarchives.gov.uk/akn"}
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class VersionsDict(TypedDict):
|
|
15
|
-
uri: str
|
|
15
|
+
uri: str ## TODO: This should be either a MarkLogicDocumentURIString (raw from ML) or a DocumentURIString (and we parse it out). Just a str is too vague.
|
|
16
16
|
version: int
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def render_versions(decoded_versions: list[BodyPart]) -> list[VersionsDict]:
|
|
20
20
|
versions: list[VersionsDict] = [
|
|
21
21
|
{
|
|
22
|
-
"uri": part.text.rstrip(".xml"),
|
|
22
|
+
"uri": part.text.strip("/").rstrip(".xml"),
|
|
23
23
|
"version": extract_version(part.text),
|
|
24
24
|
}
|
|
25
25
|
for part in decoded_versions
|
|
@@ -137,12 +137,14 @@ def publish_documents(uri: str) -> None:
|
|
|
137
137
|
response = client.list_objects(Bucket=private_bucket, Prefix=uri)
|
|
138
138
|
|
|
139
139
|
for result in response.get("Contents", []):
|
|
140
|
+
print(f"Contemplating copying {result!r}")
|
|
140
141
|
key = str(result["Key"])
|
|
141
142
|
|
|
142
143
|
if not key.endswith("parser.log") and not key.endswith(".tar.gz"):
|
|
143
144
|
source: CopySourceTypeDef = {"Bucket": private_bucket, "Key": key}
|
|
144
145
|
extra_args: dict[str, str] = {}
|
|
145
146
|
try:
|
|
147
|
+
print(f"Copying {key!r} from {private_bucket!r} to {public_bucket!r}")
|
|
146
148
|
client.copy(source, public_bucket, key, extra_args)
|
|
147
149
|
except botocore.client.ClientError as e:
|
|
148
150
|
logging.warning(
|
|
@@ -9,8 +9,7 @@ def get_xpath_match_string(
|
|
|
9
9
|
namespaces: Optional[Dict[str, str]] = None,
|
|
10
10
|
fallback: str = "",
|
|
11
11
|
) -> str:
|
|
12
|
-
|
|
13
|
-
return str((node.xpath(path, **kwargs) or [fallback])[0])
|
|
12
|
+
return str((node.xpath(path, namespaces=namespaces) or [fallback])[0])
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
def get_xpath_match_strings(
|
|
@@ -18,5 +17,4 @@ def get_xpath_match_strings(
|
|
|
18
17
|
path: str,
|
|
19
18
|
namespaces: Optional[Dict[str, str]] = None,
|
|
20
19
|
) -> list[str]:
|
|
21
|
-
|
|
22
|
-
return [str(x) for x in node.xpath(path, **kwargs)]
|
|
20
|
+
return [str(x) for x in node.xpath(path, namespaces=namespaces)]
|
|
@@ -14,7 +14,7 @@ let $docTypeQuery := cts:element-attribute-value-query(
|
|
|
14
14
|
)
|
|
15
15
|
let $refQuery := cts:element-query(
|
|
16
16
|
xs:QName("uk:summaryOf"),
|
|
17
|
-
concat("https://caselaw.nationalarchives.gov.uk/id", $parent_uri)
|
|
17
|
+
concat("https://caselaw.nationalarchives.gov.uk/id/", $parent_uri)
|
|
18
18
|
)
|
|
19
19
|
|
|
20
20
|
return xdmp:node-uri(cts:search(//akn:akomaNtoso, cts:and-query(($refQuery, $collectionQuery, $docTypeQuery))))
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
declare namespace xdmp="http://marklogic.com/xdmp";
|
|
4
|
+
xdmp:to-json(xdmp:sql(
|
|
5
|
+
"SELECT *, process_data.uri, hours_since_enrichment_request, enrich_major_version, enrich_minor_version
|
|
6
|
+
FROM (
|
|
7
|
+
SELECT
|
|
8
|
+
process_data.uri, enrich_major_version, enrich_minor_version,
|
|
9
|
+
DATEDIFF('hour', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS hours_since_enrichment_request
|
|
10
|
+
FROM documents.process_data
|
|
11
|
+
JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
|
|
12
|
+
)
|
|
13
|
+
ORDER BY hours_since_enrichment_request ASC
|
|
14
|
+
LIMIT 1000",
|
|
15
|
+
"array",
|
|
16
|
+
map:new((
|
|
17
|
+
))
|
|
18
|
+
))
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
declare namespace xdmp="http://marklogic.com/xdmp";
|
|
4
|
+
xdmp:to-json(xdmp:sql(
|
|
5
|
+
"SELECT process_data.uri, hours_since_parse_request, parser_major_version, parser_minor_version
|
|
6
|
+
FROM (
|
|
7
|
+
SELECT
|
|
8
|
+
process_data.uri, parser_major_version, parser_minor_version,
|
|
9
|
+
DATEDIFF('hour', last_sent_to_parser, CURRENT_TIMESTAMP) AS hours_since_parse_request
|
|
10
|
+
FROM documents.process_data
|
|
11
|
+
JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
|
|
12
|
+
)
|
|
13
|
+
ORDER BY hours_since_parse_request ASC
|
|
14
|
+
LIMIT 1000",
|
|
15
|
+
"array",
|
|
16
|
+
map:new((
|
|
17
|
+
))
|
|
18
|
+
))
|
|
19
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
declare namespace xdmp="http://marklogic.com/xdmp";
|
|
4
|
+
declare variable $identifier_uri as xs:string external;
|
|
5
|
+
declare variable $published_only as xs:int? external := 1;
|
|
6
|
+
|
|
7
|
+
let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
|
|
8
|
+
let $query := "SELECT * from compiled_url_slugs WHERE (identifier_slug = @uri)" || $published_query
|
|
9
|
+
|
|
10
|
+
return xdmp:sql(
|
|
11
|
+
$query,
|
|
12
|
+
"map",
|
|
13
|
+
map:new((
|
|
14
|
+
map:entry("uri", $identifier_uri)
|
|
15
|
+
))
|
|
16
|
+
)
|
|
17
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
import module namespace dls = "http://marklogic.com/xdmp/dls" at "/MarkLogic/dls.xqy";
|
|
4
|
+
|
|
5
|
+
declare variable $uri as xs:string external;
|
|
6
|
+
declare variable $value as xs:string external;
|
|
7
|
+
declare variable $name as xs:string external;
|
|
8
|
+
|
|
9
|
+
let $props := ( element {$name} {xdmp:unquote($value)/*/*} )
|
|
10
|
+
|
|
11
|
+
return dls:document-set-property($uri, $props)
|
|
@@ -113,6 +113,12 @@ class GetPropertyDict(MarkLogicAPIDict):
|
|
|
113
113
|
uri: MarkLogicDocumentURIString
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
# get_property_as_node.xqy
|
|
117
|
+
class GetPropertyAsNodeDict(MarkLogicAPIDict):
|
|
118
|
+
name: str
|
|
119
|
+
uri: MarkLogicDocumentURIString
|
|
120
|
+
|
|
121
|
+
|
|
116
122
|
# get_version_annotation.xqy
|
|
117
123
|
class GetVersionAnnotationDict(MarkLogicAPIDict):
|
|
118
124
|
uri: MarkLogicDocumentURIString
|
|
@@ -135,6 +141,12 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
|
|
|
135
141
|
uri: MarkLogicDocumentURIString
|
|
136
142
|
|
|
137
143
|
|
|
144
|
+
# resolve_from_identifier.xqy
|
|
145
|
+
class ResolveFromIdentifierDict(MarkLogicAPIDict):
|
|
146
|
+
identifier_uri: DocumentURIString
|
|
147
|
+
published_only: Optional[int]
|
|
148
|
+
|
|
149
|
+
|
|
138
150
|
# set_boolean_property.xqy
|
|
139
151
|
class SetBooleanPropertyDict(MarkLogicAPIDict):
|
|
140
152
|
name: str
|
|
@@ -187,6 +199,13 @@ class SetPropertyDict(MarkLogicAPIDict):
|
|
|
187
199
|
value: str
|
|
188
200
|
|
|
189
201
|
|
|
202
|
+
# set_property_as_node.xqy
|
|
203
|
+
class SetPropertyAsNodeDict(MarkLogicAPIDict):
|
|
204
|
+
name: str
|
|
205
|
+
uri: MarkLogicDocumentURIString
|
|
206
|
+
value: str
|
|
207
|
+
|
|
208
|
+
|
|
190
209
|
# update_document.xqy
|
|
191
210
|
class UpdateDocumentDict(MarkLogicAPIDict):
|
|
192
211
|
annotation: str
|
{ds_caselaw_marklogic_api_client-27.4.0 → ds_caselaw_marklogic_api_client-28.1.0}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|