ds-caselaw-marklogic-api-client 43.0.0__tar.gz → 44.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (99) hide show
  1. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/PKG-INFO +2 -2
  2. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/pyproject.toml +6 -6
  3. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/factories.py +1 -1
  4. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/__init__.py +12 -1
  5. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/body.py +1 -1
  6. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/aws.py +20 -0
  7. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment.xqy +2 -12
  8. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/xslt_transform.xqy +4 -28
  9. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/LICENSE.md +0 -0
  10. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/README.md +0 -0
  11. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/Client.py +0 -0
  12. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/__init__.py +0 -0
  13. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/client_helpers/__init__.py +0 -0
  14. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  15. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/content_hash.py +0 -0
  16. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/errors.py +0 -0
  17. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/identifier_resolution.py +0 -0
  18. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/managers/__init__.py +0 -0
  19. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/managers/merge/__init__.py +0 -0
  20. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/managers/merge/checks.py +0 -0
  21. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/__init__.py +0 -0
  22. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/comparison.py +0 -0
  23. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/exceptions.py +0 -0
  24. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/statuses.py +0 -0
  25. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
  26. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/versions.py +0 -0
  27. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/xml.py +0 -0
  28. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/__init__.py +0 -0
  29. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/collection.py +0 -0
  30. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/exceptions.py +0 -0
  31. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/fclid.py +0 -0
  32. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/neutral_citation.py +0 -0
  33. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/press_summary_ncn.py +0 -0
  34. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/unpacker.py +0 -0
  35. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/judgments.py +0 -0
  36. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  37. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/parser_logs.py +0 -0
  38. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/press_summaries.py +0 -0
  39. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/__init__.py +0 -0
  40. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/dates.py +0 -0
  41. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/move.py +0 -0
  42. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/py.typed +0 -0
  43. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/__init__.py +0 -0
  44. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/search_response.py +0 -0
  45. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/search_result.py +0 -0
  46. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  47. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/search_parameters.py +0 -0
  48. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/types.py +0 -0
  49. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xml_helpers.py +0 -0
  50. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/copy_document.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/document_collections.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/document_exists.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_property.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
  72. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
  73. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
  74. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  75. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  76. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/insert_document.xqy +0 -0
  77. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  78. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
  79. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
  80. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  81. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_datetime_property.xqy +0 -0
  82. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  83. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  84. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  85. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  86. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  87. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  88. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_property.xqy +0 -0
  89. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
  90. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/update_document.xqy +0 -0
  91. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  92. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  93. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  94. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  95. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/validate_document.xqy +0 -0
  96. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/xslt.xqy +0 -0
  97. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery_type_dicts.py +0 -0
  98. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xslt/modify_xml_live.xsl +0 -0
  99. {ds_caselaw_marklogic_api_client-43.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xslt/sample.xsl +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 43.0.0
3
+ Version: 44.0.1
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -9,7 +9,7 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python :: 3.13
11
11
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
12
- Requires-Dist: certifi (>=2025.10.5,<2025.11.0)
12
+ Requires-Dist: certifi (>=2025.11.12,<2025.12.0)
13
13
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
14
14
  Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
15
15
  Requires-Dist: django-environ (>=0.12.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "43.0.0"
3
+ version = "44.0.1"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -12,7 +12,7 @@ packages = [
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = "^3.12.0"
15
- certifi = ">=2025.10.5,<2025.11.0"
15
+ certifi = ">=2025.11.12,<2025.12.0"
16
16
  charset-normalizer = "^3.0.0"
17
17
  django-environ = ">=0.12.0"
18
18
  idna = "^3.4"
@@ -33,20 +33,20 @@ defusedxml = "^0.7.1"
33
33
  pydantic = "^2.12.3"
34
34
 
35
35
  [tool.poetry.group.dev.dependencies]
36
- coverage = "7.11.0"
37
- pytest = "8.4.2"
36
+ coverage = "7.12.0"
37
+ pytest = "9.0.1"
38
38
  pytest-cov = "7.0.0"
39
39
  beautifulsoup4 = "4.14.2"
40
40
  responses = "0.25.8"
41
41
  python-dotenv = "1.2.1"
42
42
  time-machine = "2.19.0"
43
- moto = {version = "5.1.15", extras = ["all"]}
43
+ moto = {version = "5.1.17", extras = ["all"]}
44
44
 
45
45
  [tool.poetry.group.docs]
46
46
  optional = true
47
47
 
48
48
  [tool.poetry.group.docs.dependencies]
49
- pdoc = "^15.0.0"
49
+ pdoc = "^16.0.0"
50
50
 
51
51
 
52
52
  [tool.commitizen]
@@ -20,7 +20,7 @@ T = TypeVar("T")
20
20
 
21
21
  DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
22
22
  <judgment name="decision">
23
- <meta/><header/>
23
+ <meta/><header><p>Header contains text</p></header>
24
24
  <judgmentBody>
25
25
  <decision>
26
26
  <p>This is a document.</p>
@@ -26,6 +26,7 @@ from caselawclient.models.utilities import VersionsDict, extract_version, render
26
26
  from caselawclient.models.utilities.aws import (
27
27
  ParserInstructionsDict,
28
28
  announce_document_event,
29
+ are_unpublished_assets_clean,
29
30
  check_docx_exists,
30
31
  delete_documents_from_private_bucket,
31
32
  generate_docx_url,
@@ -102,6 +103,11 @@ class Document:
102
103
  True,
103
104
  "There is another document with identical content",
104
105
  ),
106
+ (
107
+ "has_only_clean_assets",
108
+ True,
109
+ "An uncleaned asset exists for this document",
110
+ ),
105
111
  ]
106
112
  """
107
113
  A list of tuples in the form:
@@ -252,7 +258,7 @@ class Document:
252
258
  """
253
259
  if self.is_version:
254
260
  raise NotSupportedOnVersion(
255
- "Cannot get versions of a version for {self.uri}",
261
+ f"Cannot get versions of a version for {self.uri}",
256
262
  )
257
263
  docs = []
258
264
  for version in self.versions:
@@ -370,6 +376,11 @@ class Document:
370
376
  """Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
371
377
  return self.api_client.has_unique_content_hash(self.uri)
372
378
 
379
+ @cached_property
380
+ def has_only_clean_assets(self) -> bool:
381
+ """False if any non-tar.gz assets associated with this document have not been cleaned."""
382
+ return are_unpublished_assets_clean(self.uri)
383
+
373
384
  @cached_property
374
385
  def version_created_datetime(self) -> datetime.datetime:
375
386
  return self.api_client.get_version_created_datetime(self.uri)
@@ -177,7 +177,7 @@ class DocumentBody:
177
177
  def has_content(self) -> bool:
178
178
  """If we do not have a word document, the XML will not contain
179
179
  the contents of the judgment, but will contain a preamble."""
180
- trailing_tags = self._xml.xml_as_tree.xpath("//*[preceding::akn:meta]", namespaces=DEFAULT_NAMESPACES)
180
+ trailing_tags = self._xml.xml_as_tree.xpath("//akn:header/*", namespaces=DEFAULT_NAMESPACES)
181
181
  for tag in trailing_tags:
182
182
  if tag.tail and tag.tail.strip():
183
183
  return True
@@ -231,6 +231,26 @@ def copy_assets(old_uri: DocumentURIString, new_uri: DocumentURIString) -> None:
231
231
  )
232
232
 
233
233
 
234
+ def are_unpublished_assets_clean(uri: DocumentURIString) -> bool:
235
+ """Returns true if all non-tar.gz assets in the relevant S3 bucket have been cleaned
236
+ (they have a DOCUMENT_PROCESSOR_VERSION tag)
237
+ Note: if there are no assets, then this returns true."""
238
+ client = create_s3_client()
239
+ bucket = env("PRIVATE_ASSET_BUCKET")
240
+ response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
241
+ for result in response.get("Contents", []):
242
+ file_key = str(result["Key"])
243
+ # ignore original tar.gz files
244
+ if file_key.endswith(".tar.gz"):
245
+ continue
246
+
247
+ # check if assets are tagged as being processed by S3
248
+ tag_response = client.get_object_tagging(Bucket=bucket, Key=file_key)
249
+ if not (any(tag["Key"] == "DOCUMENT_PROCESSOR_VERSION" for tag in tag_response["TagSet"])):
250
+ return False
251
+ return True
252
+
253
+
234
254
  def build_new_key(old_key: str, new_uri: DocumentURIString) -> str:
235
255
  """Ensure that DOCX and PDF filenames are modified to reflect their new home
236
256
  as we get the name of the new S3 path"""
@@ -11,7 +11,7 @@ declare variable $version_uri as xs:string? external;
11
11
  declare variable $search_query as xs:string? external;
12
12
 
13
13
  (: Note that `xsl:output method` is changed from `html` to `xml` and we've namespaced the tag :)
14
- let $number_marks_xslt := (
14
+ let $delete_meta_marks_xslt := (
15
15
  <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
16
16
  xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn"
17
17
  xmlns:akn="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"
@@ -25,16 +25,6 @@ let $number_marks_xslt := (
25
25
  <xsl:template match="//akn:meta//uk:mark">
26
26
  <xsl:apply-templates />
27
27
  </xsl:template>
28
- <xsl:template match="uk:mark">
29
- <xsl:copy>
30
- <xsl:copy-of select="@*" />
31
- <xsl:attribute name="id">
32
- <xsl:text>mark_</xsl:text>
33
- <xsl:number count="//uk:mark" level="any" from="//*[ancestor::akn:meta]" />
34
- </xsl:attribute>
35
- <xsl:apply-templates />
36
- </xsl:copy>
37
- </xsl:template>
38
28
  </xsl:stylesheet>
39
29
  )
40
30
 
@@ -56,7 +46,7 @@ let $raw_xml := if ($show_unpublished) then
56
46
  (: If a search query string is present, highlight instances :)
57
47
  let $transformed := if($search_query) then
58
48
  xdmp:xslt-eval(
59
- $number_marks_xslt,
49
+ $delete_meta_marks_xslt,
60
50
  cts:highlight(
61
51
  $raw_xml,
62
52
  helper:make-q-query($search_query),
@@ -17,27 +17,6 @@ let $xsl_path := fn:concat("judgments/xslts/", $xsl_filename)
17
17
 
18
18
  let $params := map:map()
19
19
 
20
- let $number_marks_xslt := (
21
- <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
22
- version="2.0">
23
- <xsl:output method="html" />
24
- <xsl:template match="@*|node()">
25
- <xsl:copy>
26
- <xsl:apply-templates select="@*|node()"/>
27
- </xsl:copy>
28
- </xsl:template>
29
- <xsl:template match="mark">
30
- <xsl:copy>
31
- <xsl:copy-of select="*" />
32
- <xsl:attribute name="id">
33
- <xsl:text>mark_</xsl:text>
34
- <xsl:value-of select="count(preceding::mark)"/>
35
- </xsl:attribute>
36
- <xsl:apply-templates />
37
- </xsl:copy>
38
- </xsl:template>
39
- </xsl:stylesheet>
40
- )
41
20
  (: change the image-base of the document to match the location of the assets in $image_base
42
21
  so that references to images point to the correct places on the internet :)
43
22
  let $_put := map:put(
@@ -59,13 +38,10 @@ let $retrieved_value := if (xs:boolean($is_published) or $show_unpublished) then
59
38
  ()
60
39
 
61
40
  let $return_value := if($query) then
62
- xdmp:xslt-eval(
63
- $number_marks_xslt,
64
- cts:highlight(
65
- $retrieved_value,
66
- helper:make-q-query($query),
67
- <mark>{$cts:text}</mark>
68
- )
41
+ cts:highlight(
42
+ $retrieved_value,
43
+ helper:make-q-query($query),
44
+ <mark>{$cts:text}</mark>
69
45
  )
70
46
  else
71
47
  $retrieved_value