ds-caselaw-marklogic-api-client 37.3.1__tar.gz → 38.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (93) hide show
  1. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/PKG-INFO +6 -4
  2. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/pyproject.toml +6 -6
  3. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/factories.py +16 -11
  4. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/__init__.py +45 -10
  5. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/body.py +11 -0
  6. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/exceptions.py +4 -0
  7. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/xml.py +25 -0
  8. ds_caselaw_marklogic_api_client-38.0.0/src/caselawclient/models/identifiers/__init__.py +208 -0
  9. ds_caselaw_marklogic_api_client-38.0.0/src/caselawclient/models/identifiers/collection.py +102 -0
  10. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/identifiers/fclid.py +4 -1
  11. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/identifiers/neutral_citation.py +3 -1
  12. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +2 -0
  13. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/identifiers/unpacker.py +21 -16
  14. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/responses/search_result.py +2 -2
  15. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/types.py +11 -0
  16. ds_caselaw_marklogic_api_client-38.0.0/src/caselawclient/xslt/modify_xml_live.xsl +67 -0
  17. ds_caselaw_marklogic_api_client-38.0.0/src/caselawclient/xslt/sample.xsl +26 -0
  18. ds_caselaw_marklogic_api_client-37.3.1/src/caselawclient/models/identifiers/__init__.py +0 -186
  19. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/LICENSE.md +0 -0
  20. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/README.md +0 -0
  21. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/Client.py +0 -0
  22. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/__init__.py +0 -0
  23. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  24. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  25. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/content_hash.py +0 -0
  26. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/errors.py +0 -0
  27. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/identifier_resolution.py +0 -0
  28. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/__init__.py +0 -0
  29. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/statuses.py +0 -0
  30. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
  31. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/identifiers/exceptions.py +0 -0
  32. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/judgments.py +0 -0
  33. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  34. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/parser_logs.py +0 -0
  35. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/press_summaries.py +0 -0
  36. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  37. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/utilities/aws.py +0 -0
  38. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/utilities/dates.py +0 -0
  39. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/models/utilities/move.py +0 -0
  40. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/py.typed +0 -0
  41. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/responses/__init__.py +0 -0
  42. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/responses/search_response.py +0 -0
  43. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  44. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/search_parameters.py +0 -0
  45. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xml_helpers.py +0 -0
  46. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  72. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  73. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  74. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
  75. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
  76. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  77. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  78. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  79. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  80. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  81. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  82. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  83. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  84. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
  85. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  86. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  87. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  88. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  89. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  90. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  91. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  92. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  93. {ds_caselaw_marklogic_api_client-37.3.1 → ds_caselaw_marklogic_api_client-38.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
@@ -1,16 +1,17 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 37.3.1
3
+ Version: 38.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
- Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
5
  Keywords: national archives,caselaw
7
6
  Author: The National Archives
8
7
  Requires-Python: >=3.10.0,<4.0.0
9
8
  Classifier: Programming Language :: Python :: 3
10
9
  Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
12
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
13
- Requires-Dist: certifi (>=2025.4.26,<2025.5.0)
14
+ Requires-Dist: certifi (>=2025.6.15,<2025.7.0)
14
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
15
16
  Requires-Dist: django-environ (>=0.12.0)
16
17
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
@@ -26,6 +27,7 @@ Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
26
27
  Requires-Dist: saxonche (>=12.5.0,<13.0.0)
27
28
  Requires-Dist: sqids (>=0.5.0,<0.6.0)
28
29
  Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
30
+ Project-URL: Homepage, https://github.com/nationalarchives/ds-caselaw-custom-api-client
29
31
  Description-Content-Type: text/markdown
30
32
 
31
33
  # The National Archives: Find Case Law
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "37.3.1"
3
+ version = "38.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -12,7 +12,7 @@ packages = [
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = "^3.10.0"
15
- certifi = ">=2025.4.26,<2025.5.0"
15
+ certifi = ">=2025.6.15,<2025.7.0"
16
16
  charset-normalizer = "^3.0.0"
17
17
  django-environ = ">=0.12.0"
18
18
  idna = "^3.4"
@@ -31,13 +31,13 @@ saxonche = "^12.5.0"
31
31
  sqids = "^0.5.0"
32
32
 
33
33
  [tool.poetry.group.dev.dependencies]
34
- coverage = "7.8.2"
35
- pytest = "8.3.5"
34
+ coverage = "7.9.2"
35
+ pytest = "8.4.1"
36
36
  beautifulsoup4 = "4.13.4"
37
37
  responses = "0.25.7"
38
- python-dotenv = "1.1.0"
38
+ python-dotenv = "1.1.1"
39
39
  time-machine = "2.16.0"
40
- moto = {version = "5.1.5", extras = ["all"]}
40
+ moto = {version = "5.1.6", extras = ["all"]}
41
41
 
42
42
  [tool.poetry.group.docs]
43
43
  optional = true
@@ -1,15 +1,14 @@
1
1
  import datetime
2
2
  import json
3
- from typing import Any, Optional
3
+ from typing import Any, Generic, Optional, Type, TypeAlias, TypeVar, cast
4
4
  from unittest.mock import Mock
5
5
 
6
- from typing_extensions import TypeAlias
7
-
8
6
  from caselawclient.Client import MarklogicApiClient
9
7
  from caselawclient.identifier_resolution import IdentifierResolution, IdentifierResolutions
10
8
  from caselawclient.models.documents import Document
11
9
  from caselawclient.models.documents.body import DocumentBody
12
10
  from caselawclient.models.identifiers import Identifier
11
+ from caselawclient.models.identifiers.collection import IdentifiersCollection
13
12
  from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier
14
13
  from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
15
14
  from caselawclient.models.judgments import Judgment
@@ -17,6 +16,8 @@ from caselawclient.models.press_summaries import PressSummary
17
16
  from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
18
17
  from caselawclient.types import DocumentURIString
19
18
 
19
+ T = TypeVar("T")
20
+
20
21
  DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
21
22
  <judgment name="decision">
22
23
  <meta/><header/>
@@ -114,13 +115,13 @@ class PressSummaryFactory(DocumentFactory):
114
115
  }
115
116
 
116
117
 
117
- class SimpleFactory:
118
- target_class: TypeAlias = object
118
+ class SimpleFactory(Generic[T]):
119
+ target_class: Type[T]
119
120
  # "name_of_attribute": "default value"
120
121
  PARAMS_MAP: dict[str, Any]
121
122
 
122
123
  @classmethod
123
- def build(cls, **kwargs: Any) -> target_class:
124
+ def build(cls, **kwargs: Any) -> T:
124
125
  mock_object = Mock(spec=cls.target_class, autospec=True)
125
126
 
126
127
  for param, default in cls.PARAMS_MAP.items():
@@ -129,10 +130,10 @@ class SimpleFactory:
129
130
  else:
130
131
  setattr(mock_object.return_value, param, default)
131
132
 
132
- return mock_object()
133
+ return cast(T, mock_object())
133
134
 
134
135
 
135
- class SearchResultMetadataFactory(SimpleFactory):
136
+ class SearchResultMetadataFactory(SimpleFactory[SearchResultMetadata]):
136
137
  target_class = SearchResultMetadata
137
138
  # "name_of_attribute": "default value"
138
139
  PARAMS_MAP = {
@@ -174,9 +175,8 @@ class IdentifierResolutionsFactory:
174
175
  return IdentifierResolutions(resolutions)
175
176
 
176
177
 
177
- class SearchResultFactory(SimpleFactory):
178
+ class SearchResultFactory(SimpleFactory[SearchResult]):
178
179
  target_class = SearchResult
179
-
180
180
  PARAMS_MAP = {
181
181
  "uri": "d-a1b2c3",
182
182
  "name": "Judgment v Judgement",
@@ -189,5 +189,10 @@ class SearchResultFactory(SimpleFactory):
189
189
  "matches": None,
190
190
  "slug": "uksc/2025/1",
191
191
  "content_hash": "ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73",
192
- "identifiers": {"id-1": NeutralCitationNumber("[2025] UKSC 123", "id-1")},
192
+ "identifiers": IdentifiersCollection(
193
+ {
194
+ "id-1": NeutralCitationNumber("[2025] UKSC 123", "id-1"),
195
+ "id-2": FindCaseLawIdentifier("bcdfghjk", "id-2"),
196
+ }
197
+ ),
193
198
  }
@@ -16,6 +16,7 @@ from caselawclient.errors import (
16
16
  )
17
17
  from caselawclient.identifier_resolution import IdentifierResolutions
18
18
  from caselawclient.models.identifiers import Identifier
19
+ from caselawclient.models.identifiers.exceptions import IdentifierValidationException
19
20
  from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
20
21
  from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
21
22
  from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
@@ -33,7 +34,7 @@ from caselawclient.models.utilities.aws import (
33
34
  from caselawclient.types import DocumentURIString
34
35
 
35
36
  from .body import DocumentBody
36
- from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
37
+ from .exceptions import CannotEnrichUnenrichableDocument, CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
37
38
  from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
38
39
 
39
40
  MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
@@ -340,22 +341,34 @@ class Document:
340
341
  now.isoformat(),
341
342
  )
342
343
 
344
+ if not self.can_enrich:
345
+ msg = f"{self.uri} cannot be enriched"
346
+ raise CannotEnrichUnenrichableDocument(msg)
347
+
343
348
  announce_document_event(
344
349
  uri=self.uri,
345
350
  status="enrich",
346
351
  enrich=True,
347
352
  )
348
353
 
349
- def enrich(self) -> bool:
354
+ def enrich(self, even_if_recent: bool = False, accept_failures: bool = False) -> bool:
350
355
  """
351
356
  Request enrichment of a document, if it's sensible to do so.
352
357
  """
353
- if self.enriched_recently is False:
354
- print("Enrichment requested")
358
+ if not (even_if_recent) and self.enriched_recently:
359
+ print("Enrichment not requested as document was enriched recently")
360
+ return False
361
+
362
+ print("Enrichment requested")
363
+
364
+ try:
355
365
  self.force_enrich()
356
- return True
357
- print("Enrichment not requested as document was enriched recently")
358
- return False
366
+ except CannotEnrichUnenrichableDocument as e:
367
+ if not accept_failures:
368
+ raise e
369
+ return False
370
+
371
+ return True
359
372
 
360
373
  @cached_property
361
374
  def enriched_recently(self) -> bool:
@@ -501,10 +514,22 @@ class Document:
501
514
  """
502
515
  return self.docx_exists()
503
516
 
517
+ @cached_property
518
+ def can_enrich(self) -> bool:
519
+ """
520
+ Is it possible to enrich this document?
521
+ """
522
+ return self.body.has_content
523
+
504
524
  def save_identifiers(self) -> None:
505
- """Save the current state of this Document's identifiers to MarkLogic."""
506
- self.identifiers.validate()
507
- self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
525
+ """Validate the identifiers, and if the validation passes save them to MarkLogic"""
526
+ validations = self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
527
+ if validations.success is True:
528
+ self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
529
+ else:
530
+ raise IdentifierValidationException(
531
+ "Unable to save identifiers; validation constraints not met: " + ", ".join(validations.messages)
532
+ )
508
533
 
509
534
  def __getattr__(self, name: str) -> Any:
510
535
  warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
@@ -541,3 +566,13 @@ class Document:
541
566
  def content_as_html(self) -> str | None:
542
567
  xlst_image_location = os.getenv("XSLT_IMAGE_LOCATION", "")
543
568
  return self.body.content_html(f"{xlst_image_location}/{self.uri}")
569
+
570
+ def xml_with_correct_frbr(self) -> bytes:
571
+ """Dynamically modify FRBR uris to reflect current storage location and FCL id"""
572
+ fcl_identifiers = self.identifiers.of_type(FindCaseLawIdentifier)
573
+ work_uri = f"https://caselaw.nationalarchives.gov.uk/id/{fcl_identifiers[0].url_slug}"
574
+ expression_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}"
575
+ manifestation_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}/data.xml"
576
+ return self.body.apply_xslt(
577
+ "modify_xml_live.xsl", work_uri=work_uri, expression_uri=expression_uri, manifestation_uri=manifestation_uri
578
+ )
@@ -51,6 +51,14 @@ class DocumentBody:
51
51
  def jurisdiction(self) -> str:
52
52
  return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
53
53
 
54
+ @cached_property
55
+ def category(self) -> Optional[str]:
56
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category/text()")
57
+
58
+ @cached_property
59
+ def case_number(self) -> Optional[str]:
60
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:caseNumber/text()")
61
+
54
62
  @property
55
63
  def court_and_jurisdiction_identifier_string(self) -> CourtCode:
56
64
  if self.jurisdiction != "":
@@ -166,3 +174,6 @@ class DocumentBody:
166
174
  :return: `True` if there was a complete parser failure, otherwise `False`
167
175
  """
168
176
  return "error" in self._xml.root_element
177
+
178
+ def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
179
+ return self._xml.apply_xslt(xslt_filename, **values)
@@ -2,5 +2,9 @@ class CannotPublishUnpublishableDocument(Exception):
2
2
  """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
3
3
 
4
4
 
5
+ class CannotEnrichUnenrichableDocument(Exception):
6
+ """A document which cannot be enriched (see `Document.can_enrich`) tried to be sent to enrichment"""
7
+
8
+
5
9
  class DocumentNotSafeForDeletion(Exception):
6
10
  """A document which is not safe for deletion cannot be deleted."""
@@ -1,8 +1,16 @@
1
+ import os
2
+
1
3
  from lxml import etree
2
4
 
3
5
  from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
4
6
 
5
7
 
8
+ def _xslt_path(xslt_file_name: str) -> str:
9
+ from caselawclient.Client import ROOT_DIR
10
+
11
+ return os.path.join(ROOT_DIR, "xslt", xslt_file_name)
12
+
13
+
6
14
  class NonXMLDocumentError(Exception):
7
15
  """A document cannot be parsed as XML."""
8
16
 
@@ -41,3 +49,20 @@ class XML:
41
49
  namespaces: dict[str, str],
42
50
  ) -> list[str]:
43
51
  return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
52
+
53
+ def _modified(
54
+ self,
55
+ xslt: str,
56
+ **values: str,
57
+ ) -> bytes:
58
+ """XSLT transform this XML, given a stylesheet"""
59
+ passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()}
60
+ xslt_transform = etree.XSLT(etree.fromstring(xslt))
61
+ return etree.tostring(xslt_transform(self.xml_as_tree, profile_run=False, **passable_values))
62
+
63
+ def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
64
+ """XSLT transform this XML, given a path to a stylesheet"""
65
+ full_xslt_filename = _xslt_path(xslt_filename)
66
+ with open(full_xslt_filename) as f:
67
+ xslt = f.read()
68
+ return self._modified(xslt, **values)
@@ -0,0 +1,208 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import TYPE_CHECKING, Any, Optional
3
+ from uuid import uuid4
4
+
5
+ from lxml import etree
6
+
7
+ from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, SuccessFailureMessageTuple
8
+
9
+ from .exceptions import IdentifierValidationException
10
+
11
+ if TYPE_CHECKING:
12
+ from caselawclient.Client import MarklogicApiClient
13
+ from caselawclient.models.documents import Document
14
+
15
+ IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = [
16
+ "uuid",
17
+ "value",
18
+ "deprecated",
19
+ "url_slug",
20
+ ]
21
+ """A list of attributes of an Identifier to pack into an XML representation."""
22
+
23
+ IDENTIFIER_UNPACKABLE_ATTRIBUTES: list[str] = [
24
+ "uuid",
25
+ "value",
26
+ "deprecated",
27
+ ]
28
+ """A list of attributes to unpack from an XML representation."""
29
+
30
+
31
+ class IdentifierSchema(ABC):
32
+ """
33
+ A base class which describes what an identifier schema should look like.
34
+ """
35
+
36
+ name: str
37
+ namespace: str
38
+
39
+ human_readable: bool
40
+ """ Should this identifier type be considered for display as a 'human readable' identifier? """
41
+
42
+ base_score_multiplier: float = 1.0
43
+ """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
44
+
45
+ allow_editing: bool = True
46
+ """ Should editors be allowed to manually manipulate identifiers under this schema? """
47
+
48
+ require_globally_unique: bool = True
49
+ """ Must this identifier be globally unique? """
50
+
51
+ document_types: Optional[list[str]] = None
52
+ """
53
+ If present, a list of the names of document classes which can have this identifier.
54
+
55
+ If `None`, this identifier is valid for all document types.
56
+ """
57
+
58
+ def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
59
+ """Ensure that subclasses have the required attributes set."""
60
+ for required in (
61
+ "name",
62
+ "namespace",
63
+ "human_readable",
64
+ ):
65
+ if not hasattr(cls, required):
66
+ raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
67
+ super().__init_subclass__(**kwargs)
68
+
69
+ def __repr__(self) -> str:
70
+ return self.name
71
+
72
+ @classmethod
73
+ @abstractmethod
74
+ def validate_identifier_value(cls, value: str) -> bool:
75
+ """Check that any given identifier value is valid for this schema."""
76
+ pass
77
+
78
+ @classmethod
79
+ @abstractmethod
80
+ def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
81
+ """Convert an identifier into a precompiled URL slug."""
82
+ pass
83
+
84
+
85
+ class Identifier(ABC):
86
+ """A base class for subclasses representing a concrete identifier."""
87
+
88
+ schema: type[IdentifierSchema]
89
+
90
+ uuid: str
91
+ value: DocumentIdentifierValue
92
+
93
+ deprecated: bool
94
+ """Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?"""
95
+
96
+ def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None:
97
+ """Ensure that subclasses have the required attributes set."""
98
+ for required in ("schema",):
99
+ if not getattr(cls, required, False):
100
+ raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.")
101
+ super().__init_subclass__(**kwargs)
102
+
103
+ def __repr__(self) -> str:
104
+ representation = f"{self.schema.name} {self.value}: {self.uuid}"
105
+
106
+ if self.deprecated:
107
+ return f"<{representation} (deprecated)> "
108
+ return f"<{representation}>"
109
+
110
+ def __str__(self) -> str:
111
+ return self.value
112
+
113
+ def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None:
114
+ if not self.schema.validate_identifier_value(value=value):
115
+ raise IdentifierValidationException(
116
+ f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.'
117
+ )
118
+
119
+ self.value = DocumentIdentifierValue(value)
120
+ if uuid:
121
+ self.uuid = uuid
122
+ else:
123
+ self.uuid = "id-" + str(uuid4())
124
+
125
+ self.deprecated = deprecated
126
+
127
+ @property
128
+ def as_xml_tree(self) -> etree._Element:
129
+ """Convert this Identifier into a packed XML representation for storage."""
130
+ identifier_root = etree.Element("identifier")
131
+
132
+ namespace_attribute = etree.SubElement(identifier_root, "namespace")
133
+ namespace_attribute.text = self.schema.namespace
134
+
135
+ for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES:
136
+ packed_attribute = etree.SubElement(identifier_root, attribute_name)
137
+ attribute_value = getattr(self, attribute_name)
138
+ if type(attribute_value) is bool:
139
+ packed_attribute.text = str(attribute_value).lower()
140
+ else:
141
+ packed_attribute.text = getattr(self, attribute_name)
142
+
143
+ return identifier_root
144
+
145
+ @property
146
+ def url_slug(self) -> str:
147
+ return self.schema.compile_identifier_url_slug(self.value)
148
+
149
+ @property
150
+ def score(self) -> float:
151
+ """Return the score of this identifier, used to calculate the preferred identifier for a document."""
152
+ return 1 * self.schema.base_score_multiplier
153
+
154
+ def same_as(self, other: "Identifier") -> bool:
155
+ "Is this the same as another identifier (in value and schema)?"
156
+ return self.value == other.value and self.schema == other.schema
157
+
158
+ def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple:
159
+ """
160
+ Check against the list of identifiers in the database that this value does not currently exist.
161
+
162
+ nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace.
163
+ """
164
+ resolutions = [
165
+ resolution
166
+ for resolution in api_client.resolve_from_identifier_value(identifier_value=self.value)
167
+ if resolution.identifier_namespace == self.schema.namespace
168
+ ]
169
+ if len(resolutions) > 0:
170
+ return SuccessFailureMessageTuple(
171
+ False,
172
+ [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'],
173
+ )
174
+
175
+ return SuccessFailureMessageTuple(True, [])
176
+
177
+ def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple:
178
+ document_type_classname = document_type.__name__
179
+
180
+ if self.schema.document_types and document_type_classname not in self.schema.document_types:
181
+ return SuccessFailureMessageTuple(
182
+ False,
183
+ [
184
+ f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"'
185
+ ],
186
+ )
187
+
188
+ return SuccessFailureMessageTuple(True, [])
189
+
190
+ def perform_all_validations(
191
+ self, document_type: type["Document"], api_client: "MarklogicApiClient"
192
+ ) -> SuccessFailureMessageTuple:
193
+ """Perform all validations on a given identifier"""
194
+ validations = [
195
+ self.validate_require_globally_unique(api_client=api_client),
196
+ self.validate_valid_for_document_type(document_type=document_type),
197
+ ]
198
+
199
+ success = True
200
+ messages: list[str] = []
201
+
202
+ for validation in validations:
203
+ if validation.success is False:
204
+ success = False
205
+
206
+ messages += validation.messages
207
+
208
+ return SuccessFailureMessageTuple(success, messages)
@@ -0,0 +1,102 @@
1
+ from typing import TYPE_CHECKING, Optional, Union
2
+
3
+ from lxml import etree
4
+
5
+ from caselawclient.types import SuccessFailureMessageTuple
6
+
7
+ from . import Identifier
8
+ from .exceptions import UUIDMismatchError
9
+ from .fclid import FindCaseLawIdentifier
10
+ from .neutral_citation import NeutralCitationNumber
11
+ from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
12
+
13
+ if TYPE_CHECKING:
14
+ from caselawclient.Client import MarklogicApiClient
15
+ from caselawclient.models.documents import Document
16
+
17
+ SUPPORTED_IDENTIFIER_TYPES: list[type["Identifier"]] = [
18
+ FindCaseLawIdentifier,
19
+ NeutralCitationNumber,
20
+ PressSummaryRelatedNCNIdentifier,
21
+ ]
22
+
23
+
24
+ class IdentifiersCollection(dict[str, Identifier]):
25
+ def validate_uuids_match_keys(self) -> None:
26
+ for uuid, identifier in self.items():
27
+ if uuid != identifier.uuid:
28
+ msg = "Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"
29
+ raise UUIDMismatchError(msg)
30
+
31
+ def perform_all_validations(
32
+ self, document_type: type["Document"], api_client: "MarklogicApiClient"
33
+ ) -> SuccessFailureMessageTuple:
34
+ self.validate_uuids_match_keys()
35
+
36
+ success = True
37
+ messages: list[str] = []
38
+
39
+ for _, identifier in self.items():
40
+ validations = identifier.perform_all_validations(document_type=document_type, api_client=api_client)
41
+ if validations.success is False:
42
+ success = False
43
+
44
+ messages += validations.messages
45
+
46
+ return SuccessFailureMessageTuple(success, messages)
47
+
48
+ def contains(self, other_identifier: Identifier) -> bool:
49
+ "Do the identifier's value and namespace already exist in this group?"
50
+ return any(other_identifier.same_as(identifier) for identifier in self.values())
51
+
52
+ def add(self, identifier: Identifier) -> None:
53
+ if not self.contains(identifier):
54
+ self[identifier.uuid] = identifier
55
+
56
+ def __delitem__(self, key: Union[Identifier, str]) -> None:
57
+ if isinstance(key, Identifier):
58
+ super().__delitem__(key.uuid)
59
+ else:
60
+ super().__delitem__(key)
61
+
62
+ def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
63
+ """Return a list of all identifiers of a given type."""
64
+ uuids = self.keys()
65
+ return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
66
+
67
+ def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
68
+ "For when we want an identifier to be the only valid identifier of that type, delete the others first"
69
+ uuids = self.keys()
70
+ for uuid in list(uuids):
71
+ # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
72
+ if isinstance(self[uuid], deleted_identifier_type):
73
+ del self[uuid]
74
+
75
+ @property
76
+ def as_etree(self) -> etree._Element:
77
+ """Return an etree representation of all the Document's identifiers."""
78
+ identifiers_root = etree.Element("identifiers")
79
+
80
+ for identifier in self.values():
81
+ identifiers_root.append(identifier.as_xml_tree)
82
+
83
+ return identifiers_root
84
+
85
+ def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
86
+ """
87
+ :param type: Optionally, an identifier type to constrain this list to.
88
+
89
+ :return: Return a list of identifiers, sorted by their score in descending order.
90
+ """
91
+ identifiers = self.of_type(type) if type else list(self.values())
92
+ return sorted(identifiers, key=lambda v: v.score, reverse=True)
93
+
94
+ def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
95
+ """
96
+ :param type: Optionally, an identifier type to constrain the results to.
97
+
98
+ :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
99
+ """
100
+ if len(self.by_score(type)) == 0:
101
+ return None
102
+ return self.by_score(type)[0]
@@ -32,8 +32,11 @@ class FindCaseLawIdentifierSchema(IdentifierSchema):
32
32
  human_readable = False
33
33
  base_score_multiplier = 0.6
34
34
 
35
+ allow_editing = False
36
+ require_globally_unique = True
37
+
35
38
  @classmethod
36
- def validate_identifier(cls, value: str) -> bool:
39
+ def validate_identifier_value(cls, value: str) -> bool:
37
40
  return bool(VALID_FCLID_PATTERN.match(value))
38
41
 
39
42
  @classmethod
@@ -48,8 +48,10 @@ class NeutralCitationNumberSchema(IdentifierSchema):
48
48
  human_readable = True
49
49
  base_score_multiplier = 1.5
50
50
 
51
+ document_types = ["Judgment"]
52
+
51
53
  @classmethod
52
- def validate_identifier(cls, value: str) -> bool:
54
+ def validate_identifier_value(cls, value: str) -> bool:
53
55
  # Quick check to see if the NCN matches the expected pattern
54
56
  if not bool(VALID_NCN_PATTERN.match(value)):
55
57
  raise NCNDoesNotMatchExpectedPatternException(f"NCN '{value}' is not in the expected format")
@@ -13,6 +13,8 @@ class PressSummaryRelatedNCNIdentifierSchema(NeutralCitationNumberSchema):
13
13
  human_readable = True
14
14
  base_score_multiplier = 0.8
15
15
 
16
+ document_types = ["PressSummary"]
17
+
16
18
  @classmethod
17
19
  def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
18
20
  return DocumentIdentifierSlug(super().compile_identifier_url_slug(value) + "/press-summary")