ds-caselaw-marklogic-api-client 40.0.0__tar.gz → 41.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/PKG-INFO +1 -1
  2. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/pyproject.toml +5 -5
  3. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/Client.py +9 -0
  4. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/__init__.py +12 -2
  5. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/body.py +50 -1
  6. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/xml.py +4 -1
  7. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/__init__.py +4 -1
  8. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/collection.py +2 -0
  9. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/utilities/aws.py +15 -1
  10. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/types.py +7 -0
  11. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xml_helpers.py +18 -2
  12. ds_caselaw_marklogic_api_client-41.0.0/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +9 -0
  13. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery_type_dicts.py +5 -0
  14. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/LICENSE.md +0 -0
  15. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/README.md +0 -0
  16. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/__init__.py +0 -0
  17. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  18. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  19. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/content_hash.py +0 -0
  20. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/errors.py +0 -0
  21. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/factories.py +0 -0
  22. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/identifier_resolution.py +0 -0
  23. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/__init__.py +0 -0
  24. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/comparison.py +0 -0
  25. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/exceptions.py +0 -0
  26. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/statuses.py +0 -0
  27. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
  28. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/exceptions.py +0 -0
  29. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/fclid.py +0 -0
  30. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/neutral_citation.py +0 -0
  31. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +0 -0
  32. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/identifiers/unpacker.py +0 -0
  33. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/judgments.py +0 -0
  34. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  35. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/parser_logs.py +0 -0
  36. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/press_summaries.py +0 -0
  37. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  38. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/utilities/dates.py +0 -0
  39. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/models/utilities/move.py +0 -0
  40. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/py.typed +0 -0
  41. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/responses/__init__.py +0 -0
  42. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/responses/search_response.py +0 -0
  43. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/responses/search_result.py +0 -0
  44. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  45. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/search_parameters.py +0 -0
  46. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  72. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  73. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  74. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
  75. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
  76. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  77. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_datetime_property.xqy +0 -0
  78. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  79. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  80. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  81. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  82. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  83. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  84. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  85. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
  86. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  87. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  88. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  89. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  90. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  91. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  92. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  93. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  94. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xslt/modify_xml_live.xsl +0 -0
  95. {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-41.0.0}/src/caselawclient/xslt/sample.xsl +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 40.0.0
3
+ Version: 41.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "40.0.0"
3
+ version = "41.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -32,14 +32,14 @@ sqids = "^0.5.0"
32
32
  defusedxml = "^0.7.1"
33
33
 
34
34
  [tool.poetry.group.dev.dependencies]
35
- coverage = "7.10.4"
35
+ coverage = "7.10.6"
36
36
  pytest = "8.4.1"
37
37
  pytest-cov = "6.2.1"
38
- beautifulsoup4 = "4.13.4"
38
+ beautifulsoup4 = "4.13.5"
39
39
  responses = "0.25.8"
40
40
  python-dotenv = "1.1.1"
41
- time-machine = "2.17.0"
42
- moto = {version = "5.1.10", extras = ["all"]}
41
+ time-machine = "2.19.0"
42
+ moto = {version = "5.1.11", extras = ["all"]}
43
43
 
44
44
  [tool.poetry.group.docs]
45
45
  optional = true
@@ -34,6 +34,7 @@ from caselawclient.models.utilities import move
34
34
  from caselawclient.search_parameters import SearchParameters
35
35
  from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
36
36
  from caselawclient.xquery_type_dicts import (
37
+ CheckContentHashUniqueByUriDict,
37
38
  MarkLogicDocumentURIString,
38
39
  MarkLogicDocumentVersionURIString,
39
40
  MarkLogicPrivilegeURIString,
@@ -728,6 +729,14 @@ class MarklogicApiClient:
728
729
  == 0
729
730
  )
730
731
 
732
+ def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
733
+ """
734
+ Returns True if the content hash for this document is unique (not shared with other documents).
735
+ """
736
+ uri = self._format_uri_for_marklogic(judgment_uri)
737
+ vars: CheckContentHashUniqueByUriDict = {"uri": uri}
738
+ return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
739
+
731
740
  def eval(
732
741
  self,
733
742
  xquery_path: str,
@@ -95,6 +95,11 @@ class Document:
95
95
  True,
96
96
  "The court for this {document_noun} is not valid",
97
97
  ),
98
+ (
99
+ "has_unique_content_hash",
100
+ True,
101
+ "There is another document with identical content",
102
+ ),
98
103
  ]
99
104
  """
100
105
  A list of tuples in the form:
@@ -325,6 +330,11 @@ class Document:
325
330
  def annotation(self) -> str:
326
331
  return self.api_client.get_version_annotation(self.uri)
327
332
 
333
+ @cached_property
334
+ def has_unique_content_hash(self) -> bool:
335
+ """Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
336
+ return self.api_client.has_unique_content_hash(self.uri)
337
+
328
338
  @cached_property
329
339
  def version_created_datetime(self) -> datetime.datetime:
330
340
  return self.api_client.get_version_created_datetime(self.uri)
@@ -540,14 +550,14 @@ class Document:
540
550
  """
541
551
  Is it sensible to reparse this document?
542
552
  """
543
- return self.docx_exists()
553
+ return self.docx_exists() and not self.body.has_external_data
544
554
 
545
555
  @cached_property
546
556
  def can_enrich(self) -> bool:
547
557
  """
548
558
  Is it possible to enrich this document?
549
559
  """
550
- return self.body.has_content
560
+ return self.body.has_content and not self.body.has_external_data
551
561
 
552
562
  def validate_identifiers(self) -> SuccessFailureMessageTuple:
553
563
  return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
@@ -6,9 +6,11 @@ from typing import Optional
6
6
 
7
7
  import pytz
8
8
  from ds_caselaw_utils.types import CourtCode
9
+ from lxml import etree
9
10
  from saxonche import PySaxonProcessor
10
11
 
11
12
  from caselawclient.models.utilities.dates import parse_string_date_as_utc
13
+ from caselawclient.types import DocumentCategory
12
14
 
13
15
  from .xml import XML
14
16
 
@@ -37,6 +39,9 @@ class DocumentBody:
37
39
  def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
38
40
  return self._xml.get_xpath_match_strings(xpath, namespaces)
39
41
 
42
+ def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
43
+ return self._xml.get_xpath_nodes(xpath, namespaces)
44
+
40
45
  @cached_property
41
46
  def name(self) -> str:
42
47
  return self.get_xpath_match_string(
@@ -51,9 +56,46 @@ class DocumentBody:
51
56
  def jurisdiction(self) -> str:
52
57
  return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
53
58
 
59
+ @cached_property
60
+ def categories(self) -> list[DocumentCategory]:
61
+ xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
62
+ nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
63
+
64
+ categories: dict[str, DocumentCategory] = {}
65
+ children_map: dict[str, list[DocumentCategory]] = {}
66
+
67
+ for node in nodes:
68
+ name = node.text
69
+ if name is None or not name.strip():
70
+ continue
71
+
72
+ category = DocumentCategory(name=name)
73
+ categories[name] = category
74
+
75
+ parent = node.get("parent")
76
+
77
+ if parent:
78
+ children_map.setdefault(parent, []).append(category)
79
+
80
+ for parent, subcategories in children_map.items():
81
+ if parent in categories:
82
+ categories[parent].subcategories.extend(subcategories)
83
+
84
+ top_level_categories = [
85
+ categories[name]
86
+ for node in nodes
87
+ if node.get("parent") is None
88
+ if (name := node.text) and name in categories
89
+ ]
90
+
91
+ return top_level_categories
92
+
93
+ # NOTE: Deprecated - use categories function
54
94
  @cached_property
55
95
  def category(self) -> Optional[str]:
56
- return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category/text()")
96
+ return self.get_xpath_match_string(
97
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
98
+ )
57
99
 
58
100
  @cached_property
59
101
  def case_number(self) -> Optional[str]:
@@ -144,6 +186,13 @@ class DocumentBody:
144
186
 
145
187
  return False
146
188
 
189
+ @cached_property
190
+ def has_external_data(self) -> bool:
191
+ """Is there data which is not present within the source document:
192
+ is there a spreadsheet which has populated some fields. The current implementation
193
+ "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
194
+ return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
195
+
147
196
  @cache
148
197
  def content_html(self, image_prefix: str) -> Optional[str]:
149
198
  """Convert the XML representation of the Document into HTML for rendering."""
@@ -2,7 +2,7 @@ import os
2
2
 
3
3
  from lxml import etree
4
4
 
5
- from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
5
+ from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes
6
6
 
7
7
 
8
8
  def _xslt_path(xslt_file_name: str) -> str:
@@ -50,6 +50,9 @@ class XML:
50
50
  ) -> list[str]:
51
51
  return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
52
52
 
53
+ def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
54
+ return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
55
+
53
56
  def _modified(
54
57
  self,
55
58
  xslt: str,
@@ -46,7 +46,10 @@ class IdentifierSchema(ABC):
46
46
  """ Should editors be allowed to manually manipulate identifiers under this schema? """
47
47
 
48
48
  require_globally_unique: bool = True
49
- """ Must this identifier be globally unique? """
49
+ """ Must this identifier be globally unique? (appear on no other documents) """
50
+
51
+ allow_multiple: bool = False
52
+ """ May documents have more than one non-deprecated identifier of this type? """
50
53
 
51
54
  document_types: Optional[list[str]] = None
52
55
  """
@@ -43,6 +43,8 @@ class IdentifiersCollection(dict[str, Identifier]):
43
43
  """Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
44
44
 
45
45
  for schema, identifiers in self._list_all_identifiers_by_schema().items():
46
+ if schema.allow_multiple:
47
+ continue
46
48
  non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
47
49
  if len(non_deprecated_identifiers) > 1:
48
50
  return SuccessFailureMessageTuple(
@@ -2,6 +2,7 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import uuid
5
+ from collections.abc import Callable
5
6
  from typing import Any, Literal, Optional, TypedDict, overload
6
7
 
7
8
  import boto3
@@ -118,11 +119,20 @@ def generate_pdf_url(uri: DocumentURIString) -> str:
118
119
 
119
120
 
120
121
  def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
122
+ delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: True)
123
+
124
+
125
+ def delete_some_from_bucket(
126
+ uri: DocumentURIString, bucket: str, filter: Callable[[ObjectIdentifierTypeDef], bool]
127
+ ) -> None:
121
128
  client = create_s3_client()
122
129
  response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
123
130
 
124
131
  if response.get("Contents"):
125
- objects_to_delete: list[ObjectIdentifierTypeDef] = [{"Key": obj["Key"]} for obj in response.get("Contents", [])]
132
+ objects_to_maybe_delete: list[ObjectIdentifierTypeDef] = [
133
+ {"Key": obj["Key"]} for obj in response.get("Contents", [])
134
+ ]
135
+ objects_to_delete = [obj for obj in objects_to_maybe_delete if filter(obj)]
126
136
  client.delete_objects(
127
137
  Bucket=bucket,
128
138
  Delete={
@@ -131,6 +141,10 @@ def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
131
141
  )
132
142
 
133
143
 
144
+ def delete_non_targz_from_bucket(uri: DocumentURIString, bucket: str) -> None:
145
+ delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: not x["Key"].endswith(".tar.gz"))
146
+
147
+
134
148
  def publish_documents(uri: DocumentURIString) -> None:
135
149
  """
136
150
  Copy assets from the unpublished bucket to the published one.
@@ -1,6 +1,13 @@
1
+ from dataclasses import dataclass, field
1
2
  from typing import NamedTuple
2
3
 
3
4
 
5
+ @dataclass
6
+ class DocumentCategory:
7
+ name: str
8
+ subcategories: list["DocumentCategory"] = field(default_factory=list)
9
+
10
+
4
11
  class InvalidDocumentURIException(Exception):
5
12
  """The document URI is not valid."""
6
13
 
@@ -7,9 +7,25 @@ DEFAULT_NAMESPACES = {
7
7
  "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
8
8
  }
9
9
 
10
+ # _Element is the only class lxml exposes, so need to use the private class for typing
11
+ Element = etree._Element # noqa: SLF001
12
+
13
+
14
+ def get_xpath_nodes(
15
+ node: Element,
16
+ path: str,
17
+ namespaces: Optional[Dict[str, str]] = None,
18
+ ) -> list[Element]:
19
+ result = node.xpath(path, namespaces=namespaces)
20
+
21
+ if not isinstance(result, list) or not all(isinstance(x, Element) for x in result):
22
+ raise TypeError(f"Expected to return list[Element], got {type(result).__name__}")
23
+
24
+ return result
25
+
10
26
 
11
27
  def get_xpath_match_string(
12
- node: etree._Element,
28
+ node: Element,
13
29
  path: str,
14
30
  namespaces: Optional[Dict[str, str]] = None,
15
31
  fallback: str = "",
@@ -18,7 +34,7 @@ def get_xpath_match_string(
18
34
 
19
35
 
20
36
  def get_xpath_match_strings(
21
- node: etree._Element,
37
+ node: Element,
22
38
  path: str,
23
39
  namespaces: Optional[Dict[str, str]] = None,
24
40
  ) -> list[str]:
@@ -0,0 +1,9 @@
1
+ xquery version "1.0-ml";
2
+ declare namespace akn = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0";
3
+ declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";
4
+ declare variable $uri as xs:string external;
5
+
6
+ let $doc := doc($uri)
7
+ let $hash := $doc//uk:hash/text()
8
+ let $count := count(cts:search(fn:doc(), cts:element-value-query(xs:QName("uk:hash"), $hash)))
9
+ return $count = 1
@@ -23,6 +23,11 @@ class BreakJudgmentCheckoutDict(MarkLogicAPIDict):
23
23
  uri: MarkLogicDocumentURIString
24
24
 
25
25
 
26
+ # check_content_hash_unique_by_uri.xqy
27
+ class CheckContentHashUniqueByUriDict(MarkLogicAPIDict):
28
+ uri: MarkLogicDocumentURIString
29
+
30
+
26
31
  # checkin_judgment.xqy
27
32
  class CheckinJudgmentDict(MarkLogicAPIDict):
28
33
  uri: MarkLogicDocumentURIString