ds-caselaw-marklogic-api-client 37.4.0__tar.gz → 39.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/PKG-INFO +2 -2
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/pyproject.toml +6 -6
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/factories.py +7 -1
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/__init__.py +49 -11
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/body.py +3 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/exceptions.py +4 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/xml.py +26 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/__init__.py +66 -58
- ds_caselaw_marklogic_api_client-39.0.0/src/caselawclient/models/identifiers/collection.py +170 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/exceptions.py +0 -4
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/fclid.py +4 -1
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/neutral_citation.py +3 -1
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +2 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/identifiers/unpacker.py +5 -9
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/responses/search_result.py +2 -2
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/types.py +11 -0
- ds_caselaw_marklogic_api_client-39.0.0/src/caselawclient/xslt/modify_xml_live.xsl +70 -0
- ds_caselaw_marklogic_api_client-39.0.0/src/caselawclient/xslt/sample.xsl +26 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/README.md +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/Client.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/errors.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/identifier_resolution.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/statuses.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/judgments.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/parser_logs.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/press_summaries.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/utilities/aws.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/search_parameters.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xml_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
- {ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 39.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Keywords: national archives,caselaw
|
|
6
6
|
Author: The National Archives
|
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
14
|
-
Requires-Dist: certifi (>=2025.
|
|
14
|
+
Requires-Dist: certifi (>=2025.7.9,<2025.8.0)
|
|
15
15
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: django-environ (>=0.12.0)
|
|
17
17
|
Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
|
{ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "39.0.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -12,7 +12,7 @@ packages = [
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
14
|
python = "^3.10.0"
|
|
15
|
-
certifi = ">=2025.
|
|
15
|
+
certifi = ">=2025.7.9,<2025.8.0"
|
|
16
16
|
charset-normalizer = "^3.0.0"
|
|
17
17
|
django-environ = ">=0.12.0"
|
|
18
18
|
idna = "^3.4"
|
|
@@ -31,13 +31,13 @@ saxonche = "^12.5.0"
|
|
|
31
31
|
sqids = "^0.5.0"
|
|
32
32
|
|
|
33
33
|
[tool.poetry.group.dev.dependencies]
|
|
34
|
-
coverage = "7.
|
|
35
|
-
pytest = "8.
|
|
34
|
+
coverage = "7.9.2"
|
|
35
|
+
pytest = "8.4.1"
|
|
36
36
|
beautifulsoup4 = "4.13.4"
|
|
37
37
|
responses = "0.25.7"
|
|
38
|
-
python-dotenv = "1.1.
|
|
38
|
+
python-dotenv = "1.1.1"
|
|
39
39
|
time-machine = "2.16.0"
|
|
40
|
-
moto = {version = "5.1.
|
|
40
|
+
moto = {version = "5.1.8", extras = ["all"]}
|
|
41
41
|
|
|
42
42
|
[tool.poetry.group.docs]
|
|
43
43
|
optional = true
|
|
@@ -8,6 +8,7 @@ from caselawclient.identifier_resolution import IdentifierResolution, Identifier
|
|
|
8
8
|
from caselawclient.models.documents import Document
|
|
9
9
|
from caselawclient.models.documents.body import DocumentBody
|
|
10
10
|
from caselawclient.models.identifiers import Identifier
|
|
11
|
+
from caselawclient.models.identifiers.collection import IdentifiersCollection
|
|
11
12
|
from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier
|
|
12
13
|
from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
|
|
13
14
|
from caselawclient.models.judgments import Judgment
|
|
@@ -188,5 +189,10 @@ class SearchResultFactory(SimpleFactory[SearchResult]):
|
|
|
188
189
|
"matches": None,
|
|
189
190
|
"slug": "uksc/2025/1",
|
|
190
191
|
"content_hash": "ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73",
|
|
191
|
-
"identifiers":
|
|
192
|
+
"identifiers": IdentifiersCollection(
|
|
193
|
+
{
|
|
194
|
+
"id-1": NeutralCitationNumber("[2025] UKSC 123", "id-1"),
|
|
195
|
+
"id-2": FindCaseLawIdentifier("bcdfghjk", "id-2"),
|
|
196
|
+
}
|
|
197
|
+
),
|
|
192
198
|
}
|
|
@@ -16,6 +16,7 @@ from caselawclient.errors import (
|
|
|
16
16
|
)
|
|
17
17
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
18
18
|
from caselawclient.models.identifiers import Identifier
|
|
19
|
+
from caselawclient.models.identifiers.exceptions import IdentifierValidationException
|
|
19
20
|
from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
|
|
20
21
|
from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
|
|
21
22
|
from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
|
|
@@ -30,10 +31,10 @@ from caselawclient.models.utilities.aws import (
|
|
|
30
31
|
request_parse,
|
|
31
32
|
unpublish_documents,
|
|
32
33
|
)
|
|
33
|
-
from caselawclient.types import DocumentURIString
|
|
34
|
+
from caselawclient.types import DocumentURIString, SuccessFailureMessageTuple
|
|
34
35
|
|
|
35
36
|
from .body import DocumentBody
|
|
36
|
-
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
37
|
+
from .exceptions import CannotEnrichUnenrichableDocument, CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
37
38
|
from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
|
|
38
39
|
|
|
39
40
|
MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
|
|
@@ -340,22 +341,34 @@ class Document:
|
|
|
340
341
|
now.isoformat(),
|
|
341
342
|
)
|
|
342
343
|
|
|
344
|
+
if not self.can_enrich:
|
|
345
|
+
msg = f"{self.uri} cannot be enriched"
|
|
346
|
+
raise CannotEnrichUnenrichableDocument(msg)
|
|
347
|
+
|
|
343
348
|
announce_document_event(
|
|
344
349
|
uri=self.uri,
|
|
345
350
|
status="enrich",
|
|
346
351
|
enrich=True,
|
|
347
352
|
)
|
|
348
353
|
|
|
349
|
-
def enrich(self) -> bool:
|
|
354
|
+
def enrich(self, even_if_recent: bool = False, accept_failures: bool = False) -> bool:
|
|
350
355
|
"""
|
|
351
356
|
Request enrichment of a document, if it's sensible to do so.
|
|
352
357
|
"""
|
|
353
|
-
if self.enriched_recently
|
|
354
|
-
print("Enrichment requested")
|
|
358
|
+
if not (even_if_recent) and self.enriched_recently:
|
|
359
|
+
print("Enrichment not requested as document was enriched recently")
|
|
360
|
+
return False
|
|
361
|
+
|
|
362
|
+
print("Enrichment requested")
|
|
363
|
+
|
|
364
|
+
try:
|
|
355
365
|
self.force_enrich()
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
366
|
+
except CannotEnrichUnenrichableDocument as e:
|
|
367
|
+
if not accept_failures:
|
|
368
|
+
raise e
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
return True
|
|
359
372
|
|
|
360
373
|
@cached_property
|
|
361
374
|
def enriched_recently(self) -> bool:
|
|
@@ -501,10 +514,25 @@ class Document:
|
|
|
501
514
|
"""
|
|
502
515
|
return self.docx_exists()
|
|
503
516
|
|
|
517
|
+
@cached_property
|
|
518
|
+
def can_enrich(self) -> bool:
|
|
519
|
+
"""
|
|
520
|
+
Is it possible to enrich this document?
|
|
521
|
+
"""
|
|
522
|
+
return self.body.has_content
|
|
523
|
+
|
|
524
|
+
def validate_identifiers(self) -> SuccessFailureMessageTuple:
|
|
525
|
+
return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
|
|
526
|
+
|
|
504
527
|
def save_identifiers(self) -> None:
|
|
505
|
-
"""
|
|
506
|
-
self.
|
|
507
|
-
|
|
528
|
+
"""Validate the identifiers, and if the validation passes save them to MarkLogic"""
|
|
529
|
+
validations = self.validate_identifiers()
|
|
530
|
+
if validations.success is True:
|
|
531
|
+
self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
|
|
532
|
+
else:
|
|
533
|
+
raise IdentifierValidationException(
|
|
534
|
+
"Unable to save identifiers; validation constraints not met: " + ", ".join(validations.messages)
|
|
535
|
+
)
|
|
508
536
|
|
|
509
537
|
def __getattr__(self, name: str) -> Any:
|
|
510
538
|
warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
|
|
@@ -541,3 +569,13 @@ class Document:
|
|
|
541
569
|
def content_as_html(self) -> str | None:
|
|
542
570
|
xlst_image_location = os.getenv("XSLT_IMAGE_LOCATION", "")
|
|
543
571
|
return self.body.content_html(f"{xlst_image_location}/{self.uri}")
|
|
572
|
+
|
|
573
|
+
def xml_with_correct_frbr(self) -> bytes:
|
|
574
|
+
"""Dynamically modify FRBR uris to reflect current storage location and FCL id"""
|
|
575
|
+
fcl_identifiers = self.identifiers.of_type(FindCaseLawIdentifier)
|
|
576
|
+
work_uri = f"https://caselaw.nationalarchives.gov.uk/id/doc/{fcl_identifiers[0].value}"
|
|
577
|
+
expression_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}"
|
|
578
|
+
manifestation_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}/data.xml"
|
|
579
|
+
return self.body.apply_xslt(
|
|
580
|
+
"modify_xml_live.xsl", work_uri=work_uri, expression_uri=expression_uri, manifestation_uri=manifestation_uri
|
|
581
|
+
)
|
|
@@ -174,3 +174,6 @@ class DocumentBody:
|
|
|
174
174
|
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
175
175
|
"""
|
|
176
176
|
return "error" in self._xml.root_element
|
|
177
|
+
|
|
178
|
+
def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
|
|
179
|
+
return self._xml.apply_xslt(xslt_filename, **values)
|
|
@@ -2,5 +2,9 @@ class CannotPublishUnpublishableDocument(Exception):
|
|
|
2
2
|
"""A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
|
|
3
3
|
|
|
4
4
|
|
|
5
|
+
class CannotEnrichUnenrichableDocument(Exception):
|
|
6
|
+
"""A document which cannot be enriched (see `Document.can_enrich`) tried to be sent to enrichment"""
|
|
7
|
+
|
|
8
|
+
|
|
5
9
|
class DocumentNotSafeForDeletion(Exception):
|
|
6
10
|
"""A document which is not safe for deletion cannot be deleted."""
|
|
@@ -1,8 +1,16 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from lxml import etree
|
|
2
4
|
|
|
3
5
|
from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
4
6
|
|
|
5
7
|
|
|
8
|
+
def _xslt_path(xslt_file_name: str) -> str:
|
|
9
|
+
from caselawclient.Client import ROOT_DIR
|
|
10
|
+
|
|
11
|
+
return os.path.join(ROOT_DIR, "xslt", xslt_file_name)
|
|
12
|
+
|
|
13
|
+
|
|
6
14
|
class NonXMLDocumentError(Exception):
|
|
7
15
|
"""A document cannot be parsed as XML."""
|
|
8
16
|
|
|
@@ -41,3 +49,21 @@ class XML:
|
|
|
41
49
|
namespaces: dict[str, str],
|
|
42
50
|
) -> list[str]:
|
|
43
51
|
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
52
|
+
|
|
53
|
+
def _modified(
|
|
54
|
+
self,
|
|
55
|
+
xslt: str,
|
|
56
|
+
**values: str,
|
|
57
|
+
) -> bytes:
|
|
58
|
+
"""XSLT transform this XML, given a stylesheet"""
|
|
59
|
+
passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()}
|
|
60
|
+
xslt_transform = etree.XSLT(etree.fromstring(xslt))
|
|
61
|
+
noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values)
|
|
62
|
+
return etree.tostring(noncanonical_xml, method="c14n2")
|
|
63
|
+
|
|
64
|
+
def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
|
|
65
|
+
"""XSLT transform this XML, given a path to a stylesheet"""
|
|
66
|
+
full_xslt_filename = _xslt_path(xslt_filename)
|
|
67
|
+
with open(full_xslt_filename) as f:
|
|
68
|
+
xslt = f.read()
|
|
69
|
+
return self._modified(xslt, **values)
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any, Optional
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
3
3
|
from uuid import uuid4
|
|
4
4
|
|
|
5
5
|
from lxml import etree
|
|
6
6
|
|
|
7
|
-
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue
|
|
7
|
+
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, SuccessFailureMessageTuple
|
|
8
8
|
|
|
9
|
-
from .exceptions import IdentifierValidationException
|
|
9
|
+
from .exceptions import IdentifierValidationException
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from caselawclient.Client import MarklogicApiClient
|
|
13
|
+
from caselawclient.models.documents import Document
|
|
10
14
|
|
|
11
15
|
IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = [
|
|
12
16
|
"uuid",
|
|
@@ -38,6 +42,19 @@ class IdentifierSchema(ABC):
|
|
|
38
42
|
base_score_multiplier: float = 1.0
|
|
39
43
|
""" A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
|
|
40
44
|
|
|
45
|
+
allow_editing: bool = True
|
|
46
|
+
""" Should editors be allowed to manually manipulate identifiers under this schema? """
|
|
47
|
+
|
|
48
|
+
require_globally_unique: bool = True
|
|
49
|
+
""" Must this identifier be globally unique? """
|
|
50
|
+
|
|
51
|
+
document_types: Optional[list[str]] = None
|
|
52
|
+
"""
|
|
53
|
+
If present, a list of the names of document classes which can have this identifier.
|
|
54
|
+
|
|
55
|
+
If `None`, this identifier is valid for all document types.
|
|
56
|
+
"""
|
|
57
|
+
|
|
41
58
|
def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
|
|
42
59
|
"""Ensure that subclasses have the required attributes set."""
|
|
43
60
|
for required in (
|
|
@@ -54,7 +71,7 @@ class IdentifierSchema(ABC):
|
|
|
54
71
|
|
|
55
72
|
@classmethod
|
|
56
73
|
@abstractmethod
|
|
57
|
-
def
|
|
74
|
+
def validate_identifier_value(cls, value: str) -> bool:
|
|
58
75
|
"""Check that any given identifier value is valid for this schema."""
|
|
59
76
|
pass
|
|
60
77
|
|
|
@@ -94,7 +111,7 @@ class Identifier(ABC):
|
|
|
94
111
|
return self.value
|
|
95
112
|
|
|
96
113
|
def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None:
|
|
97
|
-
if not self.schema.
|
|
114
|
+
if not self.schema.validate_identifier_value(value=value):
|
|
98
115
|
raise IdentifierValidationException(
|
|
99
116
|
f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.'
|
|
100
117
|
)
|
|
@@ -132,72 +149,63 @@ class Identifier(ABC):
|
|
|
132
149
|
@property
|
|
133
150
|
def score(self) -> float:
|
|
134
151
|
"""Return the score of this identifier, used to calculate the preferred identifier for a document."""
|
|
152
|
+
if self.deprecated:
|
|
153
|
+
return 0
|
|
154
|
+
|
|
135
155
|
return 1 * self.schema.base_score_multiplier
|
|
136
156
|
|
|
137
157
|
def same_as(self, other: "Identifier") -> bool:
|
|
138
158
|
"Is this the same as another identifier (in value and schema)?"
|
|
139
159
|
return self.value == other.value and self.schema == other.schema
|
|
140
160
|
|
|
161
|
+
def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple:
|
|
162
|
+
"""
|
|
163
|
+
Check against the list of identifiers in the database that this value does not currently exist.
|
|
141
164
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if not self.contains(identifier):
|
|
155
|
-
self[identifier.uuid] = identifier
|
|
156
|
-
|
|
157
|
-
def __delitem__(self, key: Union[Identifier, str]) -> None:
|
|
158
|
-
if isinstance(key, Identifier):
|
|
159
|
-
super().__delitem__(key.uuid)
|
|
160
|
-
else:
|
|
161
|
-
super().__delitem__(key)
|
|
165
|
+
nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace.
|
|
166
|
+
"""
|
|
167
|
+
resolutions = [
|
|
168
|
+
resolution
|
|
169
|
+
for resolution in api_client.resolve_from_identifier_value(identifier_value=self.value)
|
|
170
|
+
if resolution.identifier_namespace == self.schema.namespace
|
|
171
|
+
]
|
|
172
|
+
if len(resolutions) > 0:
|
|
173
|
+
return SuccessFailureMessageTuple(
|
|
174
|
+
False,
|
|
175
|
+
[f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'],
|
|
176
|
+
)
|
|
162
177
|
|
|
163
|
-
|
|
164
|
-
"""Return a list of all identifiers of a given type."""
|
|
165
|
-
uuids = self.keys()
|
|
166
|
-
return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
|
|
178
|
+
return SuccessFailureMessageTuple(True, [])
|
|
167
179
|
|
|
168
|
-
def
|
|
169
|
-
|
|
170
|
-
uuids = self.keys()
|
|
171
|
-
for uuid in list(uuids):
|
|
172
|
-
# we could use compare to .schema instead, which would have diffferent behaviour for subclasses
|
|
173
|
-
if isinstance(self[uuid], deleted_identifier_type):
|
|
174
|
-
del self[uuid]
|
|
180
|
+
def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple:
|
|
181
|
+
document_type_classname = document_type.__name__
|
|
175
182
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
if self.schema.document_types and document_type_classname not in self.schema.document_types:
|
|
184
|
+
return SuccessFailureMessageTuple(
|
|
185
|
+
False,
|
|
186
|
+
[
|
|
187
|
+
f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"'
|
|
188
|
+
],
|
|
189
|
+
)
|
|
180
190
|
|
|
181
|
-
|
|
182
|
-
identifiers_root.append(identifier.as_xml_tree)
|
|
191
|
+
return SuccessFailureMessageTuple(True, [])
|
|
183
192
|
|
|
184
|
-
|
|
193
|
+
def perform_all_validations(
|
|
194
|
+
self, document_type: type["Document"], api_client: "MarklogicApiClient"
|
|
195
|
+
) -> SuccessFailureMessageTuple:
|
|
196
|
+
"""Perform all validations on a given identifier"""
|
|
197
|
+
validations = [
|
|
198
|
+
self.validate_require_globally_unique(api_client=api_client),
|
|
199
|
+
self.validate_valid_for_document_type(document_type=document_type),
|
|
200
|
+
]
|
|
185
201
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
:param type: Optionally, an identifier type to constrain this list to.
|
|
202
|
+
success = True
|
|
203
|
+
messages: list[str] = []
|
|
189
204
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
return sorted(identifiers, key=lambda v: v.score, reverse=True)
|
|
205
|
+
for validation in validations:
|
|
206
|
+
if validation.success is False:
|
|
207
|
+
success = False
|
|
194
208
|
|
|
195
|
-
|
|
196
|
-
"""
|
|
197
|
-
:param type: Optionally, an identifier type to constrain the results to.
|
|
209
|
+
messages += validation.messages
|
|
198
210
|
|
|
199
|
-
|
|
200
|
-
"""
|
|
201
|
-
if len(self.by_score(type)) == 0:
|
|
202
|
-
return None
|
|
203
|
-
return self.by_score(type)[0]
|
|
211
|
+
return SuccessFailureMessageTuple(success, messages)
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
2
|
+
|
|
3
|
+
from lxml import etree
|
|
4
|
+
|
|
5
|
+
from caselawclient.types import SuccessFailureMessageTuple
|
|
6
|
+
|
|
7
|
+
from . import Identifier, IdentifierSchema
|
|
8
|
+
from .fclid import FindCaseLawIdentifier
|
|
9
|
+
from .neutral_citation import NeutralCitationNumber
|
|
10
|
+
from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from caselawclient.Client import MarklogicApiClient
|
|
14
|
+
from caselawclient.models.documents import Document
|
|
15
|
+
|
|
16
|
+
SUPPORTED_IDENTIFIER_TYPES: list[type["Identifier"]] = [
|
|
17
|
+
FindCaseLawIdentifier,
|
|
18
|
+
NeutralCitationNumber,
|
|
19
|
+
PressSummaryRelatedNCNIdentifier,
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class IdentifiersCollection(dict[str, Identifier]):
|
|
24
|
+
def validate_uuids_match_keys(self) -> SuccessFailureMessageTuple:
|
|
25
|
+
for uuid, identifier in self.items():
|
|
26
|
+
if uuid != identifier.uuid:
|
|
27
|
+
return SuccessFailureMessageTuple(
|
|
28
|
+
False, [f"Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"]
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return SuccessFailureMessageTuple(True, [])
|
|
32
|
+
|
|
33
|
+
def _list_all_identifiers_by_schema(self) -> dict[type[IdentifierSchema], list[Identifier]]:
|
|
34
|
+
"""Get a list of all identifiers, grouped by their schema."""
|
|
35
|
+
identifiers_by_schema: dict[type[IdentifierSchema], list[Identifier]] = {}
|
|
36
|
+
|
|
37
|
+
for identifier in self.values():
|
|
38
|
+
identifiers_by_schema.setdefault(identifier.schema, []).append(identifier)
|
|
39
|
+
|
|
40
|
+
return identifiers_by_schema
|
|
41
|
+
|
|
42
|
+
def check_only_single_non_deprecated_identifier_where_multiples_not_allowed(self) -> SuccessFailureMessageTuple:
|
|
43
|
+
"""Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
|
|
44
|
+
|
|
45
|
+
for schema, identifiers in self._list_all_identifiers_by_schema().items():
|
|
46
|
+
non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
|
|
47
|
+
if len(non_deprecated_identifiers) > 1:
|
|
48
|
+
return SuccessFailureMessageTuple(
|
|
49
|
+
False,
|
|
50
|
+
[
|
|
51
|
+
f"Multiple non-deprecated identifiers found for schema '{schema.name}': {', '.join(i.value for i in non_deprecated_identifiers)}"
|
|
52
|
+
],
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return SuccessFailureMessageTuple(True, [])
|
|
56
|
+
|
|
57
|
+
def _perform_collection_level_validations(self) -> SuccessFailureMessageTuple:
|
|
58
|
+
"""Perform identifier validations which are only possible at the collection level, such as UUID integrity and identifying exclusivity problems."""
|
|
59
|
+
|
|
60
|
+
success = True
|
|
61
|
+
messages: list[str] = []
|
|
62
|
+
|
|
63
|
+
collection_validations_to_run: list[SuccessFailureMessageTuple] = [
|
|
64
|
+
self.validate_uuids_match_keys(),
|
|
65
|
+
self.check_only_single_non_deprecated_identifier_where_multiples_not_allowed(),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
for validation in collection_validations_to_run:
|
|
69
|
+
if not validation.success:
|
|
70
|
+
success = False
|
|
71
|
+
messages += validation.messages
|
|
72
|
+
|
|
73
|
+
return SuccessFailureMessageTuple(success, messages)
|
|
74
|
+
|
|
75
|
+
def _perform_identifier_level_validations(
|
|
76
|
+
self, document_type: type["Document"], api_client: "MarklogicApiClient"
|
|
77
|
+
) -> SuccessFailureMessageTuple:
|
|
78
|
+
"""Perform identifier validations at the individual identifier level."""
|
|
79
|
+
|
|
80
|
+
success = True
|
|
81
|
+
messages: list[str] = []
|
|
82
|
+
|
|
83
|
+
for _, identifier in self.items():
|
|
84
|
+
validations = identifier.perform_all_validations(document_type=document_type, api_client=api_client)
|
|
85
|
+
if validations.success is False:
|
|
86
|
+
success = False
|
|
87
|
+
|
|
88
|
+
messages += validations.messages
|
|
89
|
+
|
|
90
|
+
return SuccessFailureMessageTuple(success, messages)
|
|
91
|
+
|
|
92
|
+
def perform_all_validations(
|
|
93
|
+
self, document_type: type["Document"], api_client: "MarklogicApiClient"
|
|
94
|
+
) -> SuccessFailureMessageTuple:
|
|
95
|
+
"""Perform all possible identifier validations on this collection, both at the individual and collection level."""
|
|
96
|
+
|
|
97
|
+
identifier_level_success, identifier_level_messages = self._perform_identifier_level_validations(
|
|
98
|
+
document_type=document_type, api_client=api_client
|
|
99
|
+
)
|
|
100
|
+
collection_level_success, collection_level_messages = self._perform_collection_level_validations()
|
|
101
|
+
|
|
102
|
+
success = all([identifier_level_success, collection_level_success])
|
|
103
|
+
all_messages = identifier_level_messages + collection_level_messages
|
|
104
|
+
|
|
105
|
+
return SuccessFailureMessageTuple(success, all_messages)
|
|
106
|
+
|
|
107
|
+
def contains(self, other_identifier: Identifier) -> bool:
|
|
108
|
+
"""Does the identifier's value and namespace already exist in this group?"""
|
|
109
|
+
return any(other_identifier.same_as(identifier) for identifier in self.values())
|
|
110
|
+
|
|
111
|
+
def add(self, identifier: Identifier) -> None:
|
|
112
|
+
if not self.contains(identifier):
|
|
113
|
+
self[identifier.uuid] = identifier
|
|
114
|
+
|
|
115
|
+
def valid_new_identifier_types(self, document_type: type["Document"]) -> list[type[Identifier]]:
|
|
116
|
+
"""Return a list of identifier types which can be added to a document of the given type, given identifiers already in this collection."""
|
|
117
|
+
return [
|
|
118
|
+
t
|
|
119
|
+
for t in SUPPORTED_IDENTIFIER_TYPES
|
|
120
|
+
if t.schema.allow_editing
|
|
121
|
+
and (not t.schema.document_types or document_type.__name__ in t.schema.document_types)
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
def __delitem__(self, key: Union[Identifier, str]) -> None:
|
|
125
|
+
if isinstance(key, Identifier):
|
|
126
|
+
super().__delitem__(key.uuid)
|
|
127
|
+
else:
|
|
128
|
+
super().__delitem__(key)
|
|
129
|
+
|
|
130
|
+
def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
|
|
131
|
+
"""Return a list of all identifiers of a given type."""
|
|
132
|
+
uuids = self.keys()
|
|
133
|
+
return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
|
|
134
|
+
|
|
135
|
+
def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
|
|
136
|
+
"For when we want an identifier to be the only valid identifier of that type, delete the others first"
|
|
137
|
+
uuids = self.keys()
|
|
138
|
+
for uuid in list(uuids):
|
|
139
|
+
# we could use compare to .schema instead, which would have diffferent behaviour for subclasses
|
|
140
|
+
if isinstance(self[uuid], deleted_identifier_type):
|
|
141
|
+
del self[uuid]
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def as_etree(self) -> etree._Element:
|
|
145
|
+
"""Return an etree representation of all the Document's identifiers."""
|
|
146
|
+
identifiers_root = etree.Element("identifiers")
|
|
147
|
+
|
|
148
|
+
for identifier in self.values():
|
|
149
|
+
identifiers_root.append(identifier.as_xml_tree)
|
|
150
|
+
|
|
151
|
+
return identifiers_root
|
|
152
|
+
|
|
153
|
+
def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
|
|
154
|
+
"""
|
|
155
|
+
:param type: Optionally, an identifier type to constrain this list to.
|
|
156
|
+
|
|
157
|
+
:return: Return a list of identifiers, sorted by their score in descending order.
|
|
158
|
+
"""
|
|
159
|
+
identifiers = self.of_type(type) if type else list(self.values())
|
|
160
|
+
return sorted(identifiers, key=lambda v: v.score, reverse=True)
|
|
161
|
+
|
|
162
|
+
def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
|
|
163
|
+
"""
|
|
164
|
+
:param type: Optionally, an identifier type to constrain the results to.
|
|
165
|
+
|
|
166
|
+
:return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
|
|
167
|
+
"""
|
|
168
|
+
if len(self.by_score(type)) == 0:
|
|
169
|
+
return None
|
|
170
|
+
return self.by_score(type)[0]
|
|
@@ -32,8 +32,11 @@ class FindCaseLawIdentifierSchema(IdentifierSchema):
|
|
|
32
32
|
human_readable = False
|
|
33
33
|
base_score_multiplier = 0.6
|
|
34
34
|
|
|
35
|
+
allow_editing = False
|
|
36
|
+
require_globally_unique = True
|
|
37
|
+
|
|
35
38
|
@classmethod
|
|
36
|
-
def
|
|
39
|
+
def validate_identifier_value(cls, value: str) -> bool:
|
|
37
40
|
return bool(VALID_FCLID_PATTERN.match(value))
|
|
38
41
|
|
|
39
42
|
@classmethod
|
|
@@ -48,8 +48,10 @@ class NeutralCitationNumberSchema(IdentifierSchema):
|
|
|
48
48
|
human_readable = True
|
|
49
49
|
base_score_multiplier = 1.5
|
|
50
50
|
|
|
51
|
+
document_types = ["Judgment"]
|
|
52
|
+
|
|
51
53
|
@classmethod
|
|
52
|
-
def
|
|
54
|
+
def validate_identifier_value(cls, value: str) -> bool:
|
|
53
55
|
# Quick check to see if the NCN matches the expected pattern
|
|
54
56
|
if not bool(VALID_NCN_PATTERN.match(value)):
|
|
55
57
|
raise NCNDoesNotMatchExpectedPatternException(f"NCN '{value}' is not in the expected format")
|
|
@@ -13,6 +13,8 @@ class PressSummaryRelatedNCNIdentifierSchema(NeutralCitationNumberSchema):
|
|
|
13
13
|
human_readable = True
|
|
14
14
|
base_score_multiplier = 0.8
|
|
15
15
|
|
|
16
|
+
document_types = ["PressSummary"]
|
|
17
|
+
|
|
16
18
|
@classmethod
|
|
17
19
|
def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
|
|
18
20
|
return DocumentIdentifierSlug(super().compile_identifier_url_slug(value) + "/press-summary")
|
|
@@ -3,22 +3,18 @@ from warnings import warn
|
|
|
3
3
|
|
|
4
4
|
from lxml import etree
|
|
5
5
|
|
|
6
|
-
from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier
|
|
6
|
+
from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier
|
|
7
|
+
from .collection import SUPPORTED_IDENTIFIER_TYPES, IdentifiersCollection
|
|
7
8
|
from .exceptions import InvalidIdentifierXMLRepresentationException
|
|
8
|
-
from .fclid import FindCaseLawIdentifier
|
|
9
|
-
from .neutral_citation import NeutralCitationNumber
|
|
10
|
-
from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
|
|
11
9
|
|
|
12
10
|
IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = {
|
|
13
|
-
|
|
14
|
-
"ukncn": NeutralCitationNumber,
|
|
15
|
-
"uksummaryofncn": PressSummaryRelatedNCNIdentifier,
|
|
11
|
+
identifier_type.schema.namespace: identifier_type for identifier_type in SUPPORTED_IDENTIFIER_TYPES
|
|
16
12
|
}
|
|
17
13
|
|
|
18
14
|
|
|
19
|
-
def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) ->
|
|
15
|
+
def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> IdentifiersCollection:
|
|
20
16
|
"""This expects the entire <identifiers> tag, and unpacks all Identifiers inside it"""
|
|
21
|
-
identifiers =
|
|
17
|
+
identifiers = IdentifiersCollection()
|
|
22
18
|
if identifiers_etree is None:
|
|
23
19
|
return identifiers
|
|
24
20
|
for identifier_etree in identifiers_etree.findall("identifier"):
|
|
@@ -12,7 +12,7 @@ from ds_caselaw_utils.types import CourtCode, JurisdictionCode
|
|
|
12
12
|
from lxml import etree
|
|
13
13
|
|
|
14
14
|
from caselawclient.Client import MarklogicApiClient
|
|
15
|
-
from caselawclient.models.identifiers import
|
|
15
|
+
from caselawclient.models.identifiers.collection import IdentifiersCollection
|
|
16
16
|
from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
|
|
17
17
|
from caselawclient.types import DocumentURIString
|
|
18
18
|
from caselawclient.xml_helpers import get_xpath_match_string
|
|
@@ -180,7 +180,7 @@ class SearchResult:
|
|
|
180
180
|
)
|
|
181
181
|
|
|
182
182
|
@property
|
|
183
|
-
def identifiers(self) ->
|
|
183
|
+
def identifiers(self) -> IdentifiersCollection:
|
|
184
184
|
identifiers_etrees = self._get_xpath(".//identifiers")
|
|
185
185
|
count = len(identifiers_etrees)
|
|
186
186
|
if count != 1:
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from typing import NamedTuple
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class InvalidDocumentURIException(Exception):
|
|
2
5
|
"""The document URI is not valid."""
|
|
3
6
|
|
|
@@ -58,3 +61,11 @@ class DocumentIdentifierSlug(str):
|
|
|
58
61
|
|
|
59
62
|
class DocumentIdentifierValue(str):
|
|
60
63
|
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
SuccessFailureMessageTuple = NamedTuple("SuccessFailureMessageTuple", [("success", bool), ("messages", list[str])])
|
|
67
|
+
"""
|
|
68
|
+
A tuple used to return if an operation has succeeded or failed (and optionally a list of messages associated with that operation).
|
|
69
|
+
|
|
70
|
+
This should only be used where a failure is considered a routine part of the application (eg during validation options); where an unexpected action has led to a failure the application should raise an appropriate exception.
|
|
71
|
+
"""
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
<?xml version="1.0"?>
|
|
2
|
+
<xsl:stylesheet version="1.0"
|
|
3
|
+
xmlns='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'
|
|
4
|
+
xmlns:akn='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'
|
|
5
|
+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
|
6
|
+
xmlns:uk='https://caselaw.nationalarchives.gov.uk/akn'>
|
|
7
|
+
<xsl:param name="work_uri" />
|
|
8
|
+
<xsl:param name="expression_uri" />
|
|
9
|
+
<xsl:param name="manifestation_uri" />
|
|
10
|
+
<xsl:output method="xml" indent="yes" />
|
|
11
|
+
|
|
12
|
+
<!-- Identify transformation -->
|
|
13
|
+
<xsl:template match="@* | node()">
|
|
14
|
+
<xsl:copy>
|
|
15
|
+
<xsl:apply-templates select="@* | node()" />
|
|
16
|
+
</xsl:copy>
|
|
17
|
+
</xsl:template>
|
|
18
|
+
|
|
19
|
+
<!-- <xsl:template match="akn:identification/FRBRWork/FRBRthistext/text()"><xsl:copy-of select="$cat" /></xsl:template> -->
|
|
20
|
+
|
|
21
|
+
<xsl:template match="akn:identification/akn:FRBRWork/akn:FRBRthis">
|
|
22
|
+
<FRBRthis>
|
|
23
|
+
<xsl:attribute name="value">
|
|
24
|
+
<xsl:value-of select="$work_uri" />
|
|
25
|
+
</xsl:attribute>
|
|
26
|
+
</FRBRthis>
|
|
27
|
+
</xsl:template>
|
|
28
|
+
|
|
29
|
+
<xsl:template match="akn:identification/akn:FRBRWork/akn:FRBRuri">
|
|
30
|
+
<FRBRuri>
|
|
31
|
+
<xsl:attribute name="value">
|
|
32
|
+
<xsl:value-of select="$work_uri" />
|
|
33
|
+
</xsl:attribute>
|
|
34
|
+
</FRBRuri>
|
|
35
|
+
</xsl:template>
|
|
36
|
+
|
|
37
|
+
<xsl:template match="akn:identification/akn:FRBRExpression/akn:FRBRthis">
|
|
38
|
+
<FRBRthis>
|
|
39
|
+
<xsl:attribute name="value">
|
|
40
|
+
<xsl:value-of select="$expression_uri" />
|
|
41
|
+
</xsl:attribute>
|
|
42
|
+
</FRBRthis>
|
|
43
|
+
</xsl:template>
|
|
44
|
+
|
|
45
|
+
<xsl:template match="akn:identification/akn:FRBRExpression/akn:FRBRuri">
|
|
46
|
+
<FRBRuri>
|
|
47
|
+
<xsl:attribute name="value">
|
|
48
|
+
<xsl:value-of select="$expression_uri" />
|
|
49
|
+
</xsl:attribute>
|
|
50
|
+
</FRBRuri>
|
|
51
|
+
</xsl:template>
|
|
52
|
+
|
|
53
|
+
<xsl:template match="akn:identification/akn:FRBRManifestation/akn:FRBRthis">
|
|
54
|
+
<FRBRthis>
|
|
55
|
+
<xsl:attribute name="value">
|
|
56
|
+
<xsl:value-of select="$manifestation_uri" />
|
|
57
|
+
</xsl:attribute>
|
|
58
|
+
</FRBRthis>
|
|
59
|
+
</xsl:template>
|
|
60
|
+
|
|
61
|
+
<xsl:template match="akn:identification/akn:FRBRManifestation/akn:FRBRuri">
|
|
62
|
+
<FRBRuri>
|
|
63
|
+
<xsl:attribute name="value">
|
|
64
|
+
<xsl:value-of select="$manifestation_uri" />
|
|
65
|
+
</xsl:attribute>
|
|
66
|
+
</FRBRuri>
|
|
67
|
+
</xsl:template>
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
</xsl:stylesheet>
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
<?xml version="1.0"?>
|
|
2
|
+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
|
3
|
+
xmlns:uk='https://caselaw.nationalarchives.gov.uk/akn'
|
|
4
|
+
xmlns:akn='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'>
|
|
5
|
+
|
|
6
|
+
<xsl:param name="dog" />
|
|
7
|
+
<xsl:param name="cat" />
|
|
8
|
+
<xsl:output method="xml" indent="yes" />
|
|
9
|
+
|
|
10
|
+
<xsl:template match="@* | node()">
|
|
11
|
+
<xsl:copy>
|
|
12
|
+
<xsl:apply-templates select="@* | node()" />
|
|
13
|
+
</xsl:copy>
|
|
14
|
+
</xsl:template>
|
|
15
|
+
|
|
16
|
+
<xsl:template match="akn:text/text()"><xsl:copy-of select="$cat" /></xsl:template>
|
|
17
|
+
|
|
18
|
+
<xsl:template match="akn:attribute">
|
|
19
|
+
<akn:attribute>
|
|
20
|
+
<xsl:attribute name="attribute">
|
|
21
|
+
<xsl:value-of select="$dog" />
|
|
22
|
+
</xsl:attribute>
|
|
23
|
+
</akn:attribute>
|
|
24
|
+
</xsl:template>
|
|
25
|
+
|
|
26
|
+
</xsl:stylesheet>
|
{ds_caselaw_marklogic_api_client-37.4.0 → ds_caselaw_marklogic_api_client-39.0.0}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|