ds-caselaw-marklogic-api-client 40.0.0__tar.gz → 41.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/PKG-INFO +1 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/pyproject.toml +7 -7
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/Client.py +9 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/factories.py +2 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/__init__.py +39 -2
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/body.py +50 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/xml.py +4 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/__init__.py +4 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/collection.py +2 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/utilities/aws.py +15 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/types.py +7 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xml_helpers.py +18 -2
- ds_caselaw_marklogic_api_client-41.1.0/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +9 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery_type_dicts.py +5 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/README.md +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/client_helpers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/errors.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/identifier_resolution.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/comparison.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/exceptions.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/statuses.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/exceptions.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/fclid.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/neutral_citation.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/identifiers/unpacker.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/judgments.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/parser_logs.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/press_summaries.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/utilities/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/responses/search_result.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/search_parameters.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_datetime_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/xslt.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xslt/modify_xml_live.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/src/caselawclient/xslt/sample.xsl +0 -0
{ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "41.1.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -32,14 +32,14 @@ sqids = "^0.5.0"
|
|
|
32
32
|
defusedxml = "^0.7.1"
|
|
33
33
|
|
|
34
34
|
[tool.poetry.group.dev.dependencies]
|
|
35
|
-
coverage = "7.10.
|
|
36
|
-
pytest = "8.4.
|
|
37
|
-
pytest-cov = "6.
|
|
38
|
-
beautifulsoup4 = "4.13.
|
|
35
|
+
coverage = "7.10.6"
|
|
36
|
+
pytest = "8.4.2"
|
|
37
|
+
pytest-cov = "6.3.0"
|
|
38
|
+
beautifulsoup4 = "4.13.5"
|
|
39
39
|
responses = "0.25.8"
|
|
40
40
|
python-dotenv = "1.1.1"
|
|
41
|
-
time-machine = "2.
|
|
42
|
-
moto = {version = "5.1.
|
|
41
|
+
time-machine = "2.19.0"
|
|
42
|
+
moto = {version = "5.1.11", extras = ["all"]}
|
|
43
43
|
|
|
44
44
|
[tool.poetry.group.docs]
|
|
45
45
|
optional = true
|
|
@@ -34,6 +34,7 @@ from caselawclient.models.utilities import move
|
|
|
34
34
|
from caselawclient.search_parameters import SearchParameters
|
|
35
35
|
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
|
|
36
36
|
from caselawclient.xquery_type_dicts import (
|
|
37
|
+
CheckContentHashUniqueByUriDict,
|
|
37
38
|
MarkLogicDocumentURIString,
|
|
38
39
|
MarkLogicDocumentVersionURIString,
|
|
39
40
|
MarkLogicPrivilegeURIString,
|
|
@@ -728,6 +729,14 @@ class MarklogicApiClient:
|
|
|
728
729
|
== 0
|
|
729
730
|
)
|
|
730
731
|
|
|
732
|
+
def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
|
|
733
|
+
"""
|
|
734
|
+
Returns True if the content hash for this document is unique (not shared with other documents).
|
|
735
|
+
"""
|
|
736
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
737
|
+
vars: CheckContentHashUniqueByUriDict = {"uri": uri}
|
|
738
|
+
return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
|
|
739
|
+
|
|
731
740
|
def eval(
|
|
732
741
|
self,
|
|
733
742
|
xquery_path: str,
|
|
@@ -61,6 +61,8 @@ class DocumentFactory:
|
|
|
61
61
|
"source_name": "Example Uploader",
|
|
62
62
|
"source_email": "uploader@example.com",
|
|
63
63
|
"consignment_reference": "TDR-12345",
|
|
64
|
+
"first_published_datetime": None,
|
|
65
|
+
"has_ever_been_published": False,
|
|
64
66
|
"assigned_to": "",
|
|
65
67
|
"versions": [],
|
|
66
68
|
}
|
|
@@ -95,6 +95,11 @@ class Document:
|
|
|
95
95
|
True,
|
|
96
96
|
"The court for this {document_noun} is not valid",
|
|
97
97
|
),
|
|
98
|
+
(
|
|
99
|
+
"has_unique_content_hash",
|
|
100
|
+
True,
|
|
101
|
+
"There is another document with identical content",
|
|
102
|
+
),
|
|
98
103
|
]
|
|
99
104
|
"""
|
|
100
105
|
A list of tuples in the form:
|
|
@@ -307,10 +312,37 @@ class Document:
|
|
|
307
312
|
|
|
308
313
|
@cached_property
|
|
309
314
|
def first_published_datetime(self) -> Optional[datetime.datetime]:
|
|
315
|
+
"""
|
|
316
|
+
Return the database value for the date and time this document was first published.
|
|
317
|
+
|
|
318
|
+
:return: The datetime value in the database for "first published".
|
|
319
|
+
"""
|
|
310
320
|
return self.api_client.get_datetime_property(self.uri, "first_published_datetime")
|
|
311
321
|
|
|
322
|
+
@cached_property
|
|
323
|
+
def first_published_datetime_display(self) -> Optional[datetime.datetime]:
|
|
324
|
+
"""
|
|
325
|
+
Return the display value for the date and time this document was first published.
|
|
326
|
+
|
|
327
|
+
A value of 1970-01-01 00:00 indicates that the document has been published previously, but the exact date and time is unknown. In this case, return `None`. This can be used alongside `has_ever_been_published` to indicate an "unknown" state.
|
|
328
|
+
|
|
329
|
+
:return: The datetime value to be displayed to end users for "first published".
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
if self.first_published_datetime == datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc):
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
return self.first_published_datetime
|
|
336
|
+
|
|
312
337
|
@cached_property
|
|
313
338
|
def has_ever_been_published(self) -> bool:
|
|
339
|
+
"""
|
|
340
|
+
Do we consider this document to have ever been published?
|
|
341
|
+
|
|
342
|
+
This is `True` if either the document is currently published, or if `first_published_datetime` has any value (including the sentinel value).
|
|
343
|
+
|
|
344
|
+
:return: A boolean indicating if the document has ever been published.
|
|
345
|
+
"""
|
|
314
346
|
return self.is_published or self.first_published_datetime is not None
|
|
315
347
|
|
|
316
348
|
@cached_property
|
|
@@ -325,6 +357,11 @@ class Document:
|
|
|
325
357
|
def annotation(self) -> str:
|
|
326
358
|
return self.api_client.get_version_annotation(self.uri)
|
|
327
359
|
|
|
360
|
+
@cached_property
|
|
361
|
+
def has_unique_content_hash(self) -> bool:
|
|
362
|
+
"""Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
|
|
363
|
+
return self.api_client.has_unique_content_hash(self.uri)
|
|
364
|
+
|
|
328
365
|
@cached_property
|
|
329
366
|
def version_created_datetime(self) -> datetime.datetime:
|
|
330
367
|
return self.api_client.get_version_created_datetime(self.uri)
|
|
@@ -540,14 +577,14 @@ class Document:
|
|
|
540
577
|
"""
|
|
541
578
|
Is it sensible to reparse this document?
|
|
542
579
|
"""
|
|
543
|
-
return self.docx_exists()
|
|
580
|
+
return self.docx_exists() and not self.body.has_external_data
|
|
544
581
|
|
|
545
582
|
@cached_property
|
|
546
583
|
def can_enrich(self) -> bool:
|
|
547
584
|
"""
|
|
548
585
|
Is it possible to enrich this document?
|
|
549
586
|
"""
|
|
550
|
-
return self.body.has_content
|
|
587
|
+
return self.body.has_content and not self.body.has_external_data
|
|
551
588
|
|
|
552
589
|
def validate_identifiers(self) -> SuccessFailureMessageTuple:
|
|
553
590
|
return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
|
|
@@ -6,9 +6,11 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
import pytz
|
|
8
8
|
from ds_caselaw_utils.types import CourtCode
|
|
9
|
+
from lxml import etree
|
|
9
10
|
from saxonche import PySaxonProcessor
|
|
10
11
|
|
|
11
12
|
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
13
|
+
from caselawclient.types import DocumentCategory
|
|
12
14
|
|
|
13
15
|
from .xml import XML
|
|
14
16
|
|
|
@@ -37,6 +39,9 @@ class DocumentBody:
|
|
|
37
39
|
def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
|
|
38
40
|
return self._xml.get_xpath_match_strings(xpath, namespaces)
|
|
39
41
|
|
|
42
|
+
def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
|
|
43
|
+
return self._xml.get_xpath_nodes(xpath, namespaces)
|
|
44
|
+
|
|
40
45
|
@cached_property
|
|
41
46
|
def name(self) -> str:
|
|
42
47
|
return self.get_xpath_match_string(
|
|
@@ -51,9 +56,46 @@ class DocumentBody:
|
|
|
51
56
|
def jurisdiction(self) -> str:
|
|
52
57
|
return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
|
|
53
58
|
|
|
59
|
+
@cached_property
|
|
60
|
+
def categories(self) -> list[DocumentCategory]:
|
|
61
|
+
xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
|
|
62
|
+
nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
|
|
63
|
+
|
|
64
|
+
categories: dict[str, DocumentCategory] = {}
|
|
65
|
+
children_map: dict[str, list[DocumentCategory]] = {}
|
|
66
|
+
|
|
67
|
+
for node in nodes:
|
|
68
|
+
name = node.text
|
|
69
|
+
if name is None or not name.strip():
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
category = DocumentCategory(name=name)
|
|
73
|
+
categories[name] = category
|
|
74
|
+
|
|
75
|
+
parent = node.get("parent")
|
|
76
|
+
|
|
77
|
+
if parent:
|
|
78
|
+
children_map.setdefault(parent, []).append(category)
|
|
79
|
+
|
|
80
|
+
for parent, subcategories in children_map.items():
|
|
81
|
+
if parent in categories:
|
|
82
|
+
categories[parent].subcategories.extend(subcategories)
|
|
83
|
+
|
|
84
|
+
top_level_categories = [
|
|
85
|
+
categories[name]
|
|
86
|
+
for node in nodes
|
|
87
|
+
if node.get("parent") is None
|
|
88
|
+
if (name := node.text) and name in categories
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
return top_level_categories
|
|
92
|
+
|
|
93
|
+
# NOTE: Deprecated - use categories function
|
|
54
94
|
@cached_property
|
|
55
95
|
def category(self) -> Optional[str]:
|
|
56
|
-
return self.get_xpath_match_string(
|
|
96
|
+
return self.get_xpath_match_string(
|
|
97
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
|
|
98
|
+
)
|
|
57
99
|
|
|
58
100
|
@cached_property
|
|
59
101
|
def case_number(self) -> Optional[str]:
|
|
@@ -144,6 +186,13 @@ class DocumentBody:
|
|
|
144
186
|
|
|
145
187
|
return False
|
|
146
188
|
|
|
189
|
+
@cached_property
|
|
190
|
+
def has_external_data(self) -> bool:
|
|
191
|
+
"""Is there data which is not present within the source document:
|
|
192
|
+
is there a spreadsheet which has populated some fields. The current implementation
|
|
193
|
+
"is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
|
|
194
|
+
return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
|
|
195
|
+
|
|
147
196
|
@cache
|
|
148
197
|
def content_html(self, image_prefix: str) -> Optional[str]:
|
|
149
198
|
"""Convert the XML representation of the Document into HTML for rendering."""
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
|
|
3
3
|
from lxml import etree
|
|
4
4
|
|
|
5
|
-
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
5
|
+
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def _xslt_path(xslt_file_name: str) -> str:
|
|
@@ -50,6 +50,9 @@ class XML:
|
|
|
50
50
|
) -> list[str]:
|
|
51
51
|
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
52
52
|
|
|
53
|
+
def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
|
|
54
|
+
return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
|
|
55
|
+
|
|
53
56
|
def _modified(
|
|
54
57
|
self,
|
|
55
58
|
xslt: str,
|
|
@@ -46,7 +46,10 @@ class IdentifierSchema(ABC):
|
|
|
46
46
|
""" Should editors be allowed to manually manipulate identifiers under this schema? """
|
|
47
47
|
|
|
48
48
|
require_globally_unique: bool = True
|
|
49
|
-
""" Must this identifier be globally unique? """
|
|
49
|
+
""" Must this identifier be globally unique? (appear on no other documents) """
|
|
50
|
+
|
|
51
|
+
allow_multiple: bool = False
|
|
52
|
+
""" May documents have more than one non-deprecated identifier of this type? """
|
|
50
53
|
|
|
51
54
|
document_types: Optional[list[str]] = None
|
|
52
55
|
"""
|
|
@@ -43,6 +43,8 @@ class IdentifiersCollection(dict[str, Identifier]):
|
|
|
43
43
|
"""Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
|
|
44
44
|
|
|
45
45
|
for schema, identifiers in self._list_all_identifiers_by_schema().items():
|
|
46
|
+
if schema.allow_multiple:
|
|
47
|
+
continue
|
|
46
48
|
non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
|
|
47
49
|
if len(non_deprecated_identifiers) > 1:
|
|
48
50
|
return SuccessFailureMessageTuple(
|
|
@@ -2,6 +2,7 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import uuid
|
|
5
|
+
from collections.abc import Callable
|
|
5
6
|
from typing import Any, Literal, Optional, TypedDict, overload
|
|
6
7
|
|
|
7
8
|
import boto3
|
|
@@ -118,11 +119,20 @@ def generate_pdf_url(uri: DocumentURIString) -> str:
|
|
|
118
119
|
|
|
119
120
|
|
|
120
121
|
def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
122
|
+
delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: True)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def delete_some_from_bucket(
|
|
126
|
+
uri: DocumentURIString, bucket: str, filter: Callable[[ObjectIdentifierTypeDef], bool]
|
|
127
|
+
) -> None:
|
|
121
128
|
client = create_s3_client()
|
|
122
129
|
response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
|
|
123
130
|
|
|
124
131
|
if response.get("Contents"):
|
|
125
|
-
|
|
132
|
+
objects_to_maybe_delete: list[ObjectIdentifierTypeDef] = [
|
|
133
|
+
{"Key": obj["Key"]} for obj in response.get("Contents", [])
|
|
134
|
+
]
|
|
135
|
+
objects_to_delete = [obj for obj in objects_to_maybe_delete if filter(obj)]
|
|
126
136
|
client.delete_objects(
|
|
127
137
|
Bucket=bucket,
|
|
128
138
|
Delete={
|
|
@@ -131,6 +141,10 @@ def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
|
131
141
|
)
|
|
132
142
|
|
|
133
143
|
|
|
144
|
+
def delete_non_targz_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
145
|
+
delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: not x["Key"].endswith(".tar.gz"))
|
|
146
|
+
|
|
147
|
+
|
|
134
148
|
def publish_documents(uri: DocumentURIString) -> None:
|
|
135
149
|
"""
|
|
136
150
|
Copy assets from the unpublished bucket to the published one.
|
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
1
2
|
from typing import NamedTuple
|
|
2
3
|
|
|
3
4
|
|
|
5
|
+
@dataclass
|
|
6
|
+
class DocumentCategory:
|
|
7
|
+
name: str
|
|
8
|
+
subcategories: list["DocumentCategory"] = field(default_factory=list)
|
|
9
|
+
|
|
10
|
+
|
|
4
11
|
class InvalidDocumentURIException(Exception):
|
|
5
12
|
"""The document URI is not valid."""
|
|
6
13
|
|
|
@@ -7,9 +7,25 @@ DEFAULT_NAMESPACES = {
|
|
|
7
7
|
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
# _Element is the only class lxml exposes, so need to use the private class for typing
|
|
11
|
+
Element = etree._Element # noqa: SLF001
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_xpath_nodes(
|
|
15
|
+
node: Element,
|
|
16
|
+
path: str,
|
|
17
|
+
namespaces: Optional[Dict[str, str]] = None,
|
|
18
|
+
) -> list[Element]:
|
|
19
|
+
result = node.xpath(path, namespaces=namespaces)
|
|
20
|
+
|
|
21
|
+
if not isinstance(result, list) or not all(isinstance(x, Element) for x in result):
|
|
22
|
+
raise TypeError(f"Expected to return list[Element], got {type(result).__name__}")
|
|
23
|
+
|
|
24
|
+
return result
|
|
25
|
+
|
|
10
26
|
|
|
11
27
|
def get_xpath_match_string(
|
|
12
|
-
node:
|
|
28
|
+
node: Element,
|
|
13
29
|
path: str,
|
|
14
30
|
namespaces: Optional[Dict[str, str]] = None,
|
|
15
31
|
fallback: str = "",
|
|
@@ -18,7 +34,7 @@ def get_xpath_match_string(
|
|
|
18
34
|
|
|
19
35
|
|
|
20
36
|
def get_xpath_match_strings(
|
|
21
|
-
node:
|
|
37
|
+
node: Element,
|
|
22
38
|
path: str,
|
|
23
39
|
namespaces: Optional[Dict[str, str]] = None,
|
|
24
40
|
) -> list[str]:
|
ds_caselaw_marklogic_api_client-41.1.0/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
declare namespace akn = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0";
|
|
3
|
+
declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";
|
|
4
|
+
declare variable $uri as xs:string external;
|
|
5
|
+
|
|
6
|
+
let $doc := doc($uri)
|
|
7
|
+
let $hash := $doc//uk:hash/text()
|
|
8
|
+
let $count := count(cts:search(fn:doc(), cts:element-value-query(xs:QName("uk:hash"), $hash)))
|
|
9
|
+
return $count = 1
|
|
@@ -23,6 +23,11 @@ class BreakJudgmentCheckoutDict(MarkLogicAPIDict):
|
|
|
23
23
|
uri: MarkLogicDocumentURIString
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
# check_content_hash_unique_by_uri.xqy
|
|
27
|
+
class CheckContentHashUniqueByUriDict(MarkLogicAPIDict):
|
|
28
|
+
uri: MarkLogicDocumentURIString
|
|
29
|
+
|
|
30
|
+
|
|
26
31
|
# checkin_judgment.xqy
|
|
27
32
|
class CheckinJudgmentDict(MarkLogicAPIDict):
|
|
28
33
|
uri: MarkLogicDocumentURIString
|
{ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.1.0}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|