ds-caselaw-marklogic-api-client 24.0.1__tar.gz → 26.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (71) hide show
  1. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/PKG-INFO +2 -2
  2. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/pyproject.toml +18 -6
  3. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/Client.py +28 -13
  4. ds_caselaw_marklogic_api_client-24.0.1/src/caselawclient/models/documents.py → ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/__init__.py +27 -209
  5. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/body.py +142 -0
  6. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/exceptions.py +6 -0
  7. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/statuses.py +12 -0
  8. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/xml.py +43 -0
  9. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/judgments.py +1 -3
  10. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/press_summaries.py +1 -3
  11. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/aws.py +3 -2
  12. ds_caselaw_marklogic_api_client-24.0.1/src/caselawclient/xml_tools.py +0 -129
  13. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/LICENSE.md +0 -0
  14. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/README.md +0 -0
  15. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/__init__.py +0 -0
  16. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  17. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  18. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/content_hash.py +0 -0
  19. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/errors.py +0 -0
  20. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/__init__.py +0 -0
  21. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  22. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  23. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/dates.py +0 -0
  24. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/move.py +0 -0
  25. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/py.typed +0 -0
  26. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/__init__.py +0 -0
  27. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/search_response.py +0 -0
  28. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/search_result.py +0 -0
  29. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  30. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/search_parameters.py +0 -0
  31. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xml_helpers.py +0 -0
  32. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  33. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  34. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  35. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  36. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  37. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  38. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  39. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  40. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  41. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  42. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  43. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  44. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  45. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-24.0.1 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 24.0.1
3
+ Version: 26.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
- Requires-Dist: certifi (>=2024.7.4,<2024.8.0)
14
+ Requires-Dist: certifi (>=2024.8.30,<2024.9.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.11.0,<0.12.0)
17
17
  Requires-Dist: ds-caselaw-utils (>=1.4.1,<2.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "24.0.1"
3
+ version = "26.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -12,7 +12,7 @@ packages = [
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = "^3.9"
15
- certifi = ">=2024.7.4,<2024.8.0"
15
+ certifi = ">=2024.8.30,<2024.9.0"
16
16
  charset-normalizer = "^3.0.0"
17
17
  django-environ = "^0.11.0"
18
18
  idna = "^3.4"
@@ -42,6 +42,13 @@ optional = true
42
42
  [tool.poetry.group.docs.dependencies]
43
43
  pdoc = "^14.0.0"
44
44
 
45
+
46
+ [tool.commitizen]
47
+ name = "cz_conventional_commits"
48
+ tag_format = "v$version"
49
+ version_scheme = "semver2"
50
+ version_provider = "poetry"
51
+ update_changelog_on_bump = true
45
52
  [build-system]
46
53
  requires = ["poetry-core"]
47
54
  build-backend = "poetry.core.masonry.api"
@@ -56,12 +63,17 @@ filterwarnings = ["ignore::DeprecationWarning"]
56
63
  line-length = 120
57
64
 
58
65
  [tool.ruff.lint]
59
- ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
60
- extend-select = ["W", "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
61
- "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
62
- "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
66
+ ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
67
+ extend-select = ["W", "I", "SLF"]
68
+ # extend-select = [ "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
69
+ # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
70
+ # "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
63
71
  unfixable = ["ERA"]
64
72
 
73
+ [tool.ruff.lint.extend-per-file-ignores]
74
+ "tests/*" = ["S101"] # `assert` is fine in tests
75
+ "tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
76
+
65
77
  # things skipped:
66
78
  # N: naming, possibly good
67
79
  # D: docstrings missing throughout
@@ -8,7 +8,7 @@ from datetime import datetime, time, timedelta
8
8
  from pathlib import Path
9
9
  from typing import Any, Optional, Type, Union
10
10
  from xml.etree import ElementTree
11
- from xml.etree.ElementTree import Element
11
+ from xml.etree.ElementTree import Element, ParseError, fromstring
12
12
 
13
13
  import environ
14
14
  import requests
@@ -34,7 +34,6 @@ from caselawclient.xquery_type_dicts import (
34
34
  MarkLogicPrivilegeURIString,
35
35
  )
36
36
 
37
- from . import xml_tools
38
37
  from .content_hash import validate_content_hash
39
38
  from .errors import (
40
39
  DocumentNotFoundError,
@@ -129,7 +128,7 @@ def get_single_string_from_marklogic_response(
129
128
  # relies on "" being falsy.
130
129
  return ""
131
130
 
132
- elif part_count > 1:
131
+ if part_count > 1:
133
132
  raise MultipartResponseLongerThanExpected(
134
133
  f"Response returned {part_count} multipart items, expected 1",
135
134
  )
@@ -148,7 +147,7 @@ def get_single_bytestring_from_marklogic_response(
148
147
  # relies on "" being falsy.
149
148
  return b""
150
149
 
151
- elif part_count > 1:
150
+ if part_count > 1:
152
151
  raise MultipartResponseLongerThanExpected(
153
152
  f"Response returned {part_count} multipart items, expected 1",
154
153
  )
@@ -231,12 +230,11 @@ class MarklogicApiClient:
231
230
 
232
231
  if DOCUMENT_COLLECTION_URI_JUDGMENT in collections:
233
232
  return Judgment
234
- elif DOCUMENT_COLLECTION_URI_PRESS_SUMMARY in collections:
233
+ if DOCUMENT_COLLECTION_URI_PRESS_SUMMARY in collections:
235
234
  return PressSummary
236
- else:
237
- raise DocumentHasNoTypeCollection(
238
- f"The document at URI {uri} is not part of a valid document type collection.",
239
- )
235
+ raise DocumentHasNoTypeCollection(
236
+ f"The document at URI {uri} is not part of a valid document type collection.",
237
+ )
240
238
 
241
239
  def _get_error_code_class(self, error_code: str) -> Type[MarklogicAPIError]:
242
240
  """
@@ -251,6 +249,23 @@ class MarklogicApiClient:
251
249
  def _path_to_request_url(self, path: str) -> str:
252
250
  return f"{self.base_url}/{path.lstrip('/')}"
253
251
 
252
+ @classmethod
253
+ def _get_error_code(cls, content_as_xml: Optional[str]) -> str:
254
+ logging.warning(
255
+ "XMLTools is deprecated and will be removed in later versions. "
256
+ "Use methods from MarklogicApiClient.Client instead.",
257
+ )
258
+ if not content_as_xml:
259
+ return "Unknown error, Marklogic returned a null or empty response"
260
+ try:
261
+ xml = fromstring(content_as_xml)
262
+ return xml.find(
263
+ "message-code",
264
+ namespaces={"": "http://marklogic.com/xdmp/error"},
265
+ ).text # type: ignore
266
+ except (ParseError, TypeError, AttributeError):
267
+ return "Unknown error, Marklogic returned a null or empty response"
268
+
254
269
  def _raise_for_status(self, response: requests.Response) -> None:
255
270
  try:
256
271
  response.raise_for_status()
@@ -269,7 +284,8 @@ class MarklogicApiClient:
269
284
 
270
285
  if new_error_class == self.default_http_error_class:
271
286
  # Attempt to decode the error code from the response
272
- error_code = xml_tools.get_error_code(response.content.decode("utf-8"))
287
+
288
+ error_code = self._get_error_code(response.content.decode("utf-8"))
273
289
 
274
290
  new_error_class = self._get_error_code_class(error_code)
275
291
 
@@ -498,9 +514,8 @@ class MarklogicApiClient:
498
514
  court, jurisdiction = re.split("\\s*/\\s*", content)
499
515
  self.set_document_court(document_uri, court)
500
516
  return self.set_document_jurisdiction(document_uri, jurisdiction)
501
- else:
502
- self.set_document_court(document_uri, content)
503
- return self.set_document_jurisdiction(document_uri, "")
517
+ self.set_document_court(document_uri, content)
518
+ return self.set_document_jurisdiction(document_uri, "")
504
519
 
505
520
  def set_judgment_this_uri(
506
521
  self,
@@ -1,27 +1,21 @@
1
1
  import datetime
2
2
  import warnings
3
3
  from functools import cached_property
4
- from typing import TYPE_CHECKING, Any, Dict, NewType, Optional
4
+ from typing import TYPE_CHECKING, Any, NewType, Optional
5
5
 
6
- import pytz
7
6
  from ds_caselaw_utils import courts
8
7
  from ds_caselaw_utils.courts import CourtNotFoundException
9
- from lxml import etree
10
8
  from lxml import html as html_parser
11
9
  from requests_toolbelt.multipart import decoder
12
10
 
13
- from caselawclient.models.utilities import extract_version
14
- from caselawclient.models.utilities.dates import parse_string_date_as_utc
15
-
16
- from ..errors import (
11
+ from caselawclient.errors import (
17
12
  DocumentNotFoundError,
18
13
  GatewayTimeoutError,
19
14
  NotSupportedOnVersion,
20
15
  OnlySupportedOnVersion,
21
16
  )
22
- from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
23
- from .utilities import VersionsDict, render_versions
24
- from .utilities.aws import (
17
+ from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
18
+ from caselawclient.models.utilities.aws import (
25
19
  ParserInstructionsDict,
26
20
  announce_document_event,
27
21
  check_docx_exists,
@@ -34,31 +28,17 @@ from .utilities.aws import (
34
28
  uri_for_s3,
35
29
  )
36
30
 
37
- MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
38
-
31
+ from .body import DocumentBody
32
+ from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
33
+ from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
39
34
 
40
- class UnparsableDate(Warning):
41
- pass
35
+ MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
42
36
 
43
37
 
44
38
  class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
45
39
  pass
46
40
 
47
41
 
48
- DOCUMENT_STATUS_HOLD = "On hold"
49
- """ This document has been placed on hold to actively prevent publication. """
50
-
51
- DOCUMENT_STATUS_PUBLISHED = "Published"
52
- """ This document has been published and should be considered publicly visible. """
53
-
54
- DOCUMENT_STATUS_IN_PROGRESS = "In progress"
55
- """ This document has not been published or put on hold, and has been picked up by an editor and
56
- should be progressing through the document pipeline. """
57
-
58
- DOCUMENT_STATUS_NEW = "New"
59
- """ This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
60
-
61
-
62
42
  DOCUMENT_COLLECTION_URI_JUDGMENT = "judgment"
63
43
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY = "press-summary"
64
44
 
@@ -67,19 +47,6 @@ if TYPE_CHECKING:
67
47
 
68
48
 
69
49
  DocumentURIString = NewType("DocumentURIString", str)
70
- CourtIdentifierString = NewType("CourtIdentifierString", str)
71
-
72
-
73
- class CannotPublishUnpublishableDocument(Exception):
74
- """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
75
-
76
-
77
- class DocumentNotSafeForDeletion(Exception):
78
- """A document which is not safe for deletion cannot be deleted."""
79
-
80
-
81
- class NonXMLDocumentError(Exception):
82
- """A document cannot be parsed as XML."""
83
50
 
84
51
 
85
52
  class Document:
@@ -96,7 +63,7 @@ class Document:
96
63
 
97
64
  attributes_to_validate: list[tuple[str, bool, str]] = [
98
65
  (
99
- "failed_to_parse",
66
+ "is_failure",
100
67
  False,
101
68
  "This document failed to parse",
102
69
  ),
@@ -143,20 +110,18 @@ class Document:
143
110
 
144
111
  :raises DocumentNotFoundError: The document does not exist within MarkLogic
145
112
  """
146
- self.uri = DocumentURIString(uri.strip("/"))
147
- self.api_client = api_client
113
+ self.uri: DocumentURIString = DocumentURIString(uri.strip("/"))
114
+ self.api_client: MarklogicApiClient = api_client
148
115
  if not self.document_exists():
149
116
  raise DocumentNotFoundError(f"Document {self.uri} does not exist")
150
117
 
151
- self.xml = self.XML(
152
- xml_bytestring=self.api_client.get_judgment_xml_bytestring(
153
- self.uri,
154
- show_unpublished=True,
155
- ),
118
+ self.body: DocumentBody = DocumentBody(
119
+ xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
156
120
  )
121
+ """ `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
157
122
 
158
123
  def __repr__(self) -> str:
159
- name = self.name or "un-named"
124
+ name = self.body.name or "un-named"
160
125
  return f"<{self.document_noun} {self.uri}: {name}>"
161
126
 
162
127
  def document_exists(self) -> bool:
@@ -186,106 +151,6 @@ class Document:
186
151
  """
187
152
  return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
188
153
 
189
- @cached_property
190
- def name(self) -> str:
191
- return self.xml.get_xpath_match_string(
192
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
193
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
194
- )
195
-
196
- @cached_property
197
- def court(self) -> str:
198
- return self.xml.get_xpath_match_string(
199
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
200
- {
201
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
202
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
203
- },
204
- )
205
-
206
- @cached_property
207
- def jurisdiction(self) -> str:
208
- return self.xml.get_xpath_match_string(
209
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
210
- {
211
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
212
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
213
- },
214
- )
215
-
216
- @property
217
- def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
218
- if self.jurisdiction != "":
219
- return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
220
- else:
221
- return CourtIdentifierString(self.court)
222
-
223
- @cached_property
224
- def document_date_as_string(self) -> str:
225
- return self.xml.get_xpath_match_string(
226
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
227
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
228
- )
229
-
230
- @cached_property
231
- def document_date_as_date(self) -> Optional[datetime.date]:
232
- if not self.document_date_as_string:
233
- return None
234
- try:
235
- return datetime.datetime.strptime(
236
- self.document_date_as_string,
237
- "%Y-%m-%d",
238
- ).date()
239
- except ValueError:
240
- warnings.warn(
241
- f"Unparsable date encountered: {self.document_date_as_string}",
242
- UnparsableDate,
243
- )
244
- return None
245
-
246
- def get_manifestation_datetimes(
247
- self,
248
- name: Optional[str] = None,
249
- ) -> list[datetime.datetime]:
250
- name_filter = f"[@name='{name}']" if name else ""
251
- iso_datetimes = self.xml.get_xpath_match_strings(
252
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
253
- f"/akn:FRBRdate{name_filter}/@date",
254
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
255
- )
256
-
257
- return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
258
-
259
- def get_latest_manifestation_datetime(
260
- self,
261
- name: Optional[str] = None,
262
- ) -> Optional[datetime.datetime]:
263
- events = self.get_manifestation_datetimes(name)
264
- if not events:
265
- return None
266
- else:
267
- return max(events)
268
-
269
- def get_latest_manifestation_type(self) -> Optional[str]:
270
- return max(
271
- (
272
- (type, time)
273
- for type in ["transform", "tna-enriched"]
274
- if (time := self.get_latest_manifestation_datetime(type))
275
- ),
276
- key=lambda x: x[1],
277
- )[0]
278
-
279
- @cached_property
280
- def transformation_datetime(self) -> Optional[datetime.datetime]:
281
- """When was this document successfully parsed or reparsed (date from XML)"""
282
- return self.get_latest_manifestation_datetime("transform")
283
-
284
- @cached_property
285
- def enrichment_datetime(self) -> Optional[datetime.datetime]:
286
- """When was this document successfully enriched (date from XML)"""
287
- return self.get_latest_manifestation_datetime("tna-enriched")
288
-
289
154
  @cached_property
290
155
  def is_published(self) -> bool:
291
156
  return self.api_client.get_published(self.uri)
@@ -374,10 +239,6 @@ class Document:
374
239
  "Is this document a potentially historic version of a document, or is it the main document itself?"
375
240
  return extract_version(self.uri) != 0
376
241
 
377
- @cached_property
378
- def content_as_xml(self) -> str:
379
- return self.xml.xml_as_string
380
-
381
242
  def content_as_html(
382
243
  self,
383
244
  version_uri: Optional[DocumentURIString] = None,
@@ -404,8 +265,7 @@ class Document:
404
265
  GatewayTimeoutGettingHTMLWithQuery,
405
266
  )
406
267
  return self.content_as_html(version_uri)
407
- else:
408
- raise e
268
+ raise e
409
269
 
410
270
  def number_of_mentions(self, query: str) -> int:
411
271
  html = self.content_as_html(query=query)
@@ -421,7 +281,7 @@ class Document:
421
281
 
422
282
  :return: `True` if this document is in a 'failure' state, otherwise `False`
423
283
  """
424
- if self.failed_to_parse:
284
+ if self.body.failed_to_parse:
425
285
  return True
426
286
  return False
427
287
 
@@ -431,20 +291,9 @@ class Document:
431
291
  return True
432
292
  return False
433
293
 
434
- @cached_property
435
- def failed_to_parse(self) -> bool:
436
- """
437
- Did this document entirely fail to parse?
438
-
439
- :return: `True` if there was a complete parser failure, otherwise `False`
440
- """
441
- if "error" in self.xml.root_element:
442
- return True
443
- return False
444
-
445
294
  @cached_property
446
295
  def has_name(self) -> bool:
447
- if not self.name:
296
+ if not self.body.name:
448
297
  return False
449
298
 
450
299
  return True
@@ -453,7 +302,7 @@ class Document:
453
302
  def has_valid_court(self) -> bool:
454
303
  try:
455
304
  return bool(
456
- courts.get_by_code(self.court_and_jurisdiction_identifier_string),
305
+ courts.get_by_code(self.body.court_and_jurisdiction_identifier_string),
457
306
  )
458
307
  except CourtNotFoundException:
459
308
  return False
@@ -534,7 +383,7 @@ class Document:
534
383
  Has this document been enriched recently?
535
384
  """
536
385
 
537
- last_enrichment = self.enrichment_datetime
386
+ last_enrichment = self.body.enrichment_datetime
538
387
  if not last_enrichment:
539
388
  return False
540
389
 
@@ -615,7 +464,11 @@ class Document:
615
464
  self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
616
465
 
617
466
  parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
618
- checked_date = self.document_date_as_string if self.document_date_as_string > "1001" else None
467
+ checked_date: Optional[str] = (
468
+ self.body.document_date_as_date.isoformat()
469
+ if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
470
+ else None
471
+ )
619
472
 
620
473
  # the keys of parser_instructions should exactly match the parser output
621
474
  # in the *-metadata.json files by the parser. Whilst typically empty
@@ -624,9 +477,9 @@ class Document:
624
477
  parser_instructions: ParserInstructionsDict = {
625
478
  "documentType": parser_type_noun,
626
479
  "metadata": {
627
- "name": self.name or None,
480
+ "name": self.body.name or None,
628
481
  "cite": self.best_human_identifier or None,
629
- "court": self.court or None,
482
+ "court": self.body.court or None,
630
483
  "date": checked_date,
631
484
  "uri": self.uri,
632
485
  },
@@ -656,38 +509,3 @@ class Document:
656
509
  if self.docx_exists():
657
510
  return True
658
511
  return False
659
-
660
- class XML:
661
- """
662
- Represents the XML of a document, and should contain all methods for interacting with it.
663
- """
664
-
665
- def __init__(self, xml_bytestring: bytes):
666
- """
667
- :raises NonXMLDocumentError: This document is not valid XML
668
- """
669
- try:
670
- self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
671
- except etree.XMLSyntaxError:
672
- raise NonXMLDocumentError
673
-
674
- @property
675
- def xml_as_string(self) -> str:
676
- """
677
- :return: A string representation of this document's XML tree.
678
- """
679
- return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
680
-
681
- @property
682
- def root_element(self) -> str:
683
- return str(self.xml_as_tree.tag)
684
-
685
- def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
686
- return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
687
-
688
- def get_xpath_match_strings(
689
- self,
690
- xpath: str,
691
- namespaces: Dict[str, str],
692
- ) -> list[str]:
693
- return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
@@ -0,0 +1,142 @@
1
+ import datetime
2
+ import warnings
3
+ from functools import cached_property
4
+ from typing import NewType, Optional
5
+
6
+ import pytz
7
+
8
+ from caselawclient.models.utilities.dates import parse_string_date_as_utc
9
+
10
+ from .xml import XML
11
+
12
+ CourtIdentifierString = NewType("CourtIdentifierString", str)
13
+
14
+
15
+ class UnparsableDate(Warning):
16
+ pass
17
+
18
+
19
+ class DocumentBody:
20
+ """
21
+ A class for abstracting out interactions with the body of a document.
22
+ """
23
+
24
+ def __init__(self, xml_bytestring: bytes):
25
+ self._xml = XML(xml_bytestring=xml_bytestring)
26
+ """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
27
+
28
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
29
+ return self._xml.get_xpath_match_string(xpath, namespaces)
30
+
31
+ @cached_property
32
+ def name(self) -> str:
33
+ return self._xml.get_xpath_match_string(
34
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
35
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
36
+ )
37
+
38
+ @cached_property
39
+ def court(self) -> str:
40
+ return self._xml.get_xpath_match_string(
41
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
42
+ {
43
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
44
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
45
+ },
46
+ )
47
+
48
+ @cached_property
49
+ def jurisdiction(self) -> str:
50
+ return self._xml.get_xpath_match_string(
51
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
52
+ {
53
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
54
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
55
+ },
56
+ )
57
+
58
+ @property
59
+ def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
60
+ if self.jurisdiction != "":
61
+ return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
62
+ return CourtIdentifierString(self.court)
63
+
64
+ @cached_property
65
+ def document_date_as_string(self) -> str:
66
+ return self._xml.get_xpath_match_string(
67
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
68
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
69
+ )
70
+
71
+ @cached_property
72
+ def document_date_as_date(self) -> Optional[datetime.date]:
73
+ if not self.document_date_as_string:
74
+ return None
75
+ try:
76
+ return datetime.datetime.strptime(
77
+ self.document_date_as_string,
78
+ "%Y-%m-%d",
79
+ ).date()
80
+ except ValueError:
81
+ warnings.warn(
82
+ f"Unparsable date encountered: {self.document_date_as_string}",
83
+ UnparsableDate,
84
+ )
85
+ return None
86
+
87
+ def get_manifestation_datetimes(
88
+ self,
89
+ name: Optional[str] = None,
90
+ ) -> list[datetime.datetime]:
91
+ name_filter = f"[@name='{name}']" if name else ""
92
+ iso_datetimes = self._xml.get_xpath_match_strings(
93
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
94
+ f"/akn:FRBRdate{name_filter}/@date",
95
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
96
+ )
97
+
98
+ return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
99
+
100
+ def get_latest_manifestation_datetime(
101
+ self,
102
+ name: Optional[str] = None,
103
+ ) -> Optional[datetime.datetime]:
104
+ events = self.get_manifestation_datetimes(name)
105
+ if not events:
106
+ return None
107
+ return max(events)
108
+
109
+ def get_latest_manifestation_type(self) -> Optional[str]:
110
+ return max(
111
+ (
112
+ (type, time)
113
+ for type in ["transform", "tna-enriched"]
114
+ if (time := self.get_latest_manifestation_datetime(type))
115
+ ),
116
+ key=lambda x: x[1],
117
+ )[0]
118
+
119
+ @cached_property
120
+ def transformation_datetime(self) -> Optional[datetime.datetime]:
121
+ """When was this document successfully parsed or reparsed (date from XML)"""
122
+ return self.get_latest_manifestation_datetime("transform")
123
+
124
+ @cached_property
125
+ def enrichment_datetime(self) -> Optional[datetime.datetime]:
126
+ """When was this document successfully enriched (date from XML)"""
127
+ return self.get_latest_manifestation_datetime("tna-enriched")
128
+
129
+ @cached_property
130
+ def content_as_xml(self) -> str:
131
+ return self._xml.xml_as_string
132
+
133
+ @cached_property
134
+ def failed_to_parse(self) -> bool:
135
+ """
136
+ Did this document entirely fail to parse?
137
+
138
+ :return: `True` if there was a complete parser failure, otherwise `False`
139
+ """
140
+ if "error" in self._xml.root_element:
141
+ return True
142
+ return False
@@ -0,0 +1,6 @@
1
+ class CannotPublishUnpublishableDocument(Exception):
2
+ """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
3
+
4
+
5
+ class DocumentNotSafeForDeletion(Exception):
6
+ """A document which is not safe for deletion cannot be deleted."""
@@ -0,0 +1,12 @@
1
+ DOCUMENT_STATUS_HOLD = "On hold"
2
+ """ This document has been placed on hold to actively prevent publication. """
3
+
4
+ DOCUMENT_STATUS_PUBLISHED = "Published"
5
+ """ This document has been published and should be considered publicly visible. """
6
+
7
+ DOCUMENT_STATUS_IN_PROGRESS = "In progress"
8
+ """ This document has not been published or put on hold, and has been picked up by an editor and
9
+ should be progressing through the document pipeline. """
10
+
11
+ DOCUMENT_STATUS_NEW = "New"
12
+ """ This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
@@ -0,0 +1,43 @@
1
+ from lxml import etree
2
+
3
+ from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
4
+
5
+
6
+ class NonXMLDocumentError(Exception):
7
+ """A document cannot be parsed as XML."""
8
+
9
+
10
+ class XML:
11
+ """
12
+ A class for interacting with the raw XML of a document.
13
+ """
14
+
15
+ def __init__(self, xml_bytestring: bytes):
16
+ """
17
+ :raises NonXMLDocumentError: This document is not valid XML
18
+ """
19
+ try:
20
+ self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
21
+ except etree.XMLSyntaxError:
22
+ raise NonXMLDocumentError
23
+
24
+ @property
25
+ def xml_as_string(self) -> str:
26
+ """
27
+ :return: A string representation of this document's XML tree.
28
+ """
29
+ return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
30
+
31
+ @property
32
+ def root_element(self) -> str:
33
+ return str(self.xml_as_tree.tag)
34
+
35
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
36
+ return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
37
+
38
+ def get_xpath_match_strings(
39
+ self,
40
+ xpath: str,
41
+ namespaces: dict[str, str],
42
+ ) -> list[str]:
43
+ return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
@@ -8,7 +8,6 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
8
8
  if TYPE_CHECKING:
9
9
  from caselawclient.models.press_summaries import PressSummary
10
10
 
11
- from ..xml_helpers import get_xpath_match_string
12
11
  from .documents import Document
13
12
 
14
13
 
@@ -25,8 +24,7 @@ class Judgment(NeutralCitationMixin, Document):
25
24
 
26
25
  @cached_property
27
26
  def neutral_citation(self) -> str:
28
- return get_xpath_match_string(
29
- self.xml.xml_as_tree,
27
+ return self.body.get_xpath_match_string(
30
28
  "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
31
29
  {
32
30
  "uk": "https://caselaw.nationalarchives.gov.uk/akn",
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Any, Optional
6
6
 
7
7
  from caselawclient.errors import DocumentNotFoundError
8
8
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
9
- from caselawclient.xml_helpers import get_xpath_match_string
10
9
 
11
10
  from .documents import Document
12
11
 
@@ -27,8 +26,7 @@ class PressSummary(NeutralCitationMixin, Document):
27
26
 
28
27
  @cached_property
29
28
  def neutral_citation(self) -> str:
30
- return get_xpath_match_string(
31
- self.xml.xml_as_tree,
29
+ return self.body.get_xpath_match_string(
32
30
  "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
33
31
  {
34
32
  "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
@@ -218,8 +218,7 @@ def build_new_key(old_key: str, new_uri: str) -> str:
218
218
  if old_filename.endswith(".docx") or old_filename.endswith(".pdf"):
219
219
  new_filename = new_uri.replace("/", "_")
220
220
  return f"{new_uri}/{new_filename}.{old_filename.split('.')[-1]}"
221
- else:
222
- return f"{new_uri}/{old_filename}"
221
+ return f"{new_uri}/{old_filename}"
223
222
 
224
223
 
225
224
  def request_parse(
@@ -250,6 +249,8 @@ def request_parse(
250
249
  },
251
250
  }
252
251
 
252
+ # breakpoint()
253
+
253
254
  client.publish(
254
255
  TopicArn=env("REPARSE_SNS_TOPIC"),
255
256
  Message=json.dumps(message_to_send),
@@ -1,129 +0,0 @@
1
- import logging
2
- from typing import List, Optional
3
- from xml.etree.ElementTree import (
4
- Element,
5
- ElementTree,
6
- ParseError,
7
- QName,
8
- fromstring,
9
- tostring,
10
- )
11
-
12
- akn_uk_namespaces = {
13
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
14
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
15
- }
16
- akn_namespace_uri = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"
17
- uk_namespace_uri = "https://caselaw.nationalarchives.gov.uk/akn"
18
- search_namespace = {"search": "http://marklogic.com/appservices/search"}
19
-
20
-
21
- class JudgmentMissingMetadataError(IndexError):
22
- pass
23
-
24
-
25
- def get_element(
26
- xml: ElementTree,
27
- xpath: str,
28
- element_name: str = "FRBRname",
29
- element_namespace: str = akn_namespace_uri,
30
- has_value_attribute: bool = True,
31
- ) -> Element:
32
- logging.warning(
33
- "XMLTools is deprecated and will be removed in later versions. "
34
- "Use methods from MarklogicApiClient.Client instead.",
35
- )
36
- name = xml.find(
37
- xpath,
38
- namespaces=akn_uk_namespaces,
39
- )
40
-
41
- if name is None:
42
- element = Element(QName(element_namespace, element_name)) # type: ignore
43
- if has_value_attribute:
44
- element.set("value", "")
45
- return element
46
-
47
- return name
48
-
49
-
50
- def get_neutral_citation_element(xml: ElementTree) -> Element:
51
- return get_element(xml, ".//uk:cite", "cite", uk_namespace_uri, False)
52
-
53
-
54
- def get_neutral_citation_name_value(xml: ElementTree) -> Optional[str]:
55
- return get_neutral_citation_element(xml).text
56
-
57
-
58
- def get_judgment_date_element(xml: ElementTree) -> Element:
59
- logging.warning(
60
- "XMLTools is deprecated and will be removed in later versions. "
61
- "Use methods from MarklogicApiClient.Client instead.",
62
- )
63
- name = xml.find(
64
- ".//akn:FRBRWork/akn:FRBRdate",
65
- namespaces=akn_uk_namespaces,
66
- )
67
-
68
- if name is None:
69
- element = Element(QName(akn_namespace_uri, "FRBRdate")) # type: ignore
70
- element.set("date", "")
71
- element.set("name", "judgment")
72
-
73
- return element
74
-
75
- return name
76
-
77
-
78
- def get_judgment_date_value(xml: ElementTree) -> str:
79
- return get_judgment_date_element(xml).attrib["date"]
80
-
81
-
82
- def get_court_element(xml: ElementTree) -> Element:
83
- return get_element(xml, ".//uk:court", "court", uk_namespace_uri, False)
84
-
85
-
86
- def get_court_value(xml: ElementTree) -> Optional[str]:
87
- return get_court_element(xml).text
88
-
89
-
90
- def get_metadata_name_element(xml: ElementTree) -> Element:
91
- return get_element(xml, ".//akn:FRBRname", "FRBRname", akn_namespace_uri, True)
92
-
93
-
94
- def get_metadata_name_value(xml: ElementTree) -> str:
95
- name = get_metadata_name_element(xml)
96
- value = name.attrib["value"]
97
- if value is None:
98
- return ""
99
- return value
100
-
101
-
102
- def get_search_matches(element: ElementTree) -> List[str]:
103
- logging.warning(
104
- "XMLTools is deprecated and will be removed in later versions. "
105
- "Use methods from MarklogicApiClient.Client instead.",
106
- )
107
- nodes = element.findall(".//search:match", namespaces=search_namespace)
108
- results = []
109
- for node in nodes:
110
- text = tostring(node, method="text", encoding="UTF-8")
111
- results.append(text.decode("UTF-8").strip())
112
- return results
113
-
114
-
115
- def get_error_code(content_as_xml: Optional[str]) -> str:
116
- logging.warning(
117
- "XMLTools is deprecated and will be removed in later versions. "
118
- "Use methods from MarklogicApiClient.Client instead.",
119
- )
120
- if not content_as_xml:
121
- return "Unknown error, Marklogic returned a null or empty response"
122
- try:
123
- xml = fromstring(content_as_xml)
124
- return xml.find(
125
- "message-code",
126
- namespaces={"": "http://marklogic.com/xdmp/error"},
127
- ).text # type: ignore
128
- except (ParseError, TypeError, AttributeError):
129
- return "Unknown error, Marklogic returned a null or empty response"