ds-caselaw-marklogic-api-client 25.0.0__tar.gz → 27.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (71) hide show
  1. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/PKG-INFO +2 -2
  2. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/pyproject.toml +13 -5
  3. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/Client.py +3 -10
  4. ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/factories.py +165 -0
  5. ds_caselaw_marklogic_api_client-25.0.0/src/caselawclient/models/documents.py → ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/models/documents/__init__.py +33 -226
  6. ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/models/documents/body.py +139 -0
  7. ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/models/documents/exceptions.py +6 -0
  8. ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/models/documents/statuses.py +12 -0
  9. ds_caselaw_marklogic_api_client-27.0.0/src/caselawclient/models/documents/xml.py +43 -0
  10. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/judgments.py +12 -10
  11. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/neutral_citation_mixin.py +7 -11
  12. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/press_summaries.py +11 -9
  13. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/utilities/aws.py +2 -0
  14. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/utilities/dates.py +1 -5
  15. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/utilities/move.py +12 -63
  16. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/responses/search_response.py +2 -2
  17. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/responses/search_result.py +6 -4
  18. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/LICENSE.md +0 -0
  19. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/README.md +0 -0
  20. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/__init__.py +0 -0
  21. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  22. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  23. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/content_hash.py +0 -0
  24. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/errors.py +0 -0
  25. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/__init__.py +0 -0
  26. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  27. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/py.typed +0 -0
  28. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/responses/__init__.py +0 -0
  29. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  30. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/search_parameters.py +0 -0
  31. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xml_helpers.py +0 -0
  32. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  33. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  34. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  35. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  36. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  37. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  38. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  39. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  40. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  41. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  42. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  43. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  44. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  45. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-27.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 25.0.0
3
+ Version: 27.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -14,7 +14,7 @@ Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
14
  Requires-Dist: certifi (>=2024.8.30,<2024.9.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.11.0,<0.12.0)
17
- Requires-Dist: ds-caselaw-utils (>=1.4.1,<2.0.0)
17
+ Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
18
18
  Requires-Dist: idna (>=3.4,<4.0)
19
19
  Requires-Dist: lxml (>=5.0.0,<6.0.0)
20
20
  Requires-Dist: memoization (>=0.4.0,<0.5.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "25.0.0"
3
+ version = "27.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -20,7 +20,7 @@ requests = "^2.28.2"
20
20
  requests-toolbelt = ">=0.10.1,<1.1.0"
21
21
  memoization = "^0.4.0"
22
22
  lxml = "^5.0.0"
23
- ds-caselaw-utils = "^1.4.1"
23
+ ds-caselaw-utils = "^2.0.0"
24
24
  boto3 = "^1.26.112"
25
25
  typing-extensions = "^4.7.1"
26
26
  mypy-boto3-s3 = "^1.26.104"
@@ -42,6 +42,13 @@ optional = true
42
42
  [tool.poetry.group.docs.dependencies]
43
43
  pdoc = "^14.0.0"
44
44
 
45
+
46
+ [tool.commitizen]
47
+ name = "cz_conventional_commits"
48
+ tag_format = "v$version"
49
+ version_scheme = "semver2"
50
+ version_provider = "poetry"
51
+ update_changelog_on_bump = true
45
52
  [build-system]
46
53
  requires = ["poetry-core"]
47
54
  build-backend = "poetry.core.masonry.api"
@@ -56,15 +63,16 @@ filterwarnings = ["ignore::DeprecationWarning"]
56
63
  line-length = 120
57
64
 
58
65
  [tool.ruff.lint]
59
- ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
60
- extend-select = ["W", "I"]
66
+ ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
67
+ extend-select = ["W", "I", "SLF", "SIM"]
61
68
  # extend-select = [ "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
62
- # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
69
+ # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "TID", "TCH", "INT", "PTH",
63
70
  # "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
64
71
  unfixable = ["ERA"]
65
72
 
66
73
  [tool.ruff.lint.extend-per-file-ignores]
67
74
  "tests/*" = ["S101"] # `assert` is fine in tests
75
+ "tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
68
76
 
69
77
  # things skipped:
70
78
  # N: naming, possibly good
@@ -12,6 +12,7 @@ from xml.etree.ElementTree import Element, ParseError, fromstring
12
12
 
13
13
  import environ
14
14
  import requests
15
+ from ds_caselaw_utils.types import NeutralCitationString
15
16
  from requests.auth import HTTPBasicAuth
16
17
  from requests.structures import CaseInsensitiveDict
17
18
  from requests_toolbelt.multipart import decoder
@@ -800,10 +801,7 @@ class MarklogicApiClient:
800
801
  else None
801
802
  )
802
803
 
803
- if os.getenv("XSLT_IMAGE_LOCATION"):
804
- image_location = os.getenv("XSLT_IMAGE_LOCATION")
805
- else:
806
- image_location = ""
804
+ image_location = os.getenv("XSLT_IMAGE_LOCATION", "")
807
805
 
808
806
  show_unpublished = self.verify_show_unpublished(show_unpublished)
809
807
 
@@ -1038,12 +1036,7 @@ class MarklogicApiClient:
1038
1036
  search_parameters.collections = [DOCUMENT_COLLECTION_URI_JUDGMENT]
1039
1037
  return self.search_and_decode_response(search_parameters)
1040
1038
 
1041
- def overwrite_document(self, old_uri: str, new_citation: str) -> str:
1042
- """Move the judgment at old_uri on top of the new citation, which must already exist
1043
- Compare to update_document_uri"""
1044
- return move.overwrite_document(old_uri, new_citation, api_client=self)
1045
-
1046
- def update_document_uri(self, old_uri: str, new_citation: str) -> str:
1039
+ def update_document_uri(self, old_uri: DocumentURIString, new_citation: NeutralCitationString) -> DocumentURIString:
1047
1040
  """
1048
1041
  Move the document at old_uri to the correct location based on the neutral citation
1049
1042
  The new neutral citation *must* not already exist (that is handled elsewhere)
@@ -0,0 +1,165 @@
1
+ import datetime
2
+ from typing import Any, Optional, cast
3
+ from unittest.mock import Mock, patch
4
+
5
+ from typing_extensions import TypeAlias
6
+
7
+ from caselawclient.Client import MarklogicApiClient
8
+ from caselawclient.models.documents import Document
9
+ from caselawclient.models.documents.body import DocumentBody
10
+ from caselawclient.models.judgments import Judgment
11
+ from caselawclient.models.press_summaries import PressSummary
12
+ from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
13
+
14
+ DEFAULT_DOCUMENT_BODY_XML = "<akomantoso>This is some XML of a judgment.</akomantoso>"
15
+
16
+
17
+ class DocumentBodyFactory:
18
+ # "name_of_attribute": "default value"
19
+ PARAMS_MAP: dict[str, Any] = {
20
+ "name": "Judgment v Judgement",
21
+ "court": "Court of Testing",
22
+ "document_date_as_string": "2023-02-03",
23
+ }
24
+
25
+ @classmethod
26
+ def build(cls, xml_string: str = DEFAULT_DOCUMENT_BODY_XML, **kwargs: Any) -> DocumentBody:
27
+ document_body = DocumentBody(
28
+ xml_bytestring=xml_string.encode(encoding="utf-8"),
29
+ )
30
+
31
+ for param_name, default_value in cls.PARAMS_MAP.items():
32
+ value = kwargs.get(param_name, default_value)
33
+ setattr(document_body, param_name, value)
34
+
35
+ return document_body
36
+
37
+
38
+ class DocumentFactory:
39
+ # "name_of_attribute": "default value"
40
+ PARAMS_MAP: dict[str, Any] = {
41
+ "is_published": False,
42
+ "is_sensitive": False,
43
+ "is_anonymised": False,
44
+ "is_failure": False,
45
+ "source_name": "Example Uploader",
46
+ "source_email": "uploader@example.com",
47
+ "consignment_reference": "TDR-12345",
48
+ "assigned_to": "",
49
+ "versions": [],
50
+ }
51
+
52
+ target_class: TypeAlias = Document
53
+
54
+ @classmethod
55
+ def build(
56
+ cls,
57
+ uri: str = "test/2023/123",
58
+ html: str = "<p>This is a judgment.</p>",
59
+ api_client: Optional[MarklogicApiClient] = None,
60
+ **kwargs: Any,
61
+ ) -> target_class:
62
+ if not api_client:
63
+ api_client = Mock(spec=MarklogicApiClient)
64
+ api_client.get_judgment_xml_bytestring.return_value = DEFAULT_DOCUMENT_BODY_XML.encode(encoding="utf-8")
65
+
66
+ with patch.object(cls.target_class, "content_as_html") as mock_content_as_html:
67
+ mock_content_as_html.return_value = html
68
+ document = cls.target_class(uri, api_client=api_client)
69
+
70
+ document.body = kwargs.pop("body") if "body" in kwargs else DocumentBodyFactory.build()
71
+
72
+ for param_name, default_value in cls.PARAMS_MAP.items():
73
+ value = kwargs.get(param_name, default_value)
74
+ setattr(document, param_name, value)
75
+
76
+ return document
77
+
78
+
79
+ class JudgmentFactory(DocumentFactory):
80
+ target_class = Judgment
81
+
82
+ def __init__(self) -> None:
83
+ self.PARAMS_MAP = self.PARAMS_MAP | {
84
+ "neutral_citation": "[2023] Test 123",
85
+ "best_human_identifier": "[2023] Test 123",
86
+ }
87
+
88
+ super().__init__()
89
+
90
+ @classmethod
91
+ def build(
92
+ cls,
93
+ uri: str = "test/2023/123",
94
+ html: str = "<p>This is a judgment.</p>",
95
+ api_client: Optional[MarklogicApiClient] = None,
96
+ **kwargs: Any,
97
+ ) -> Judgment:
98
+ return cast(Judgment, super().build(uri, html, api_client, **kwargs))
99
+
100
+
101
+ class PressSummaryFactory(DocumentFactory):
102
+ target_class = PressSummary
103
+
104
+ def __init__(self) -> None:
105
+ self.PARAMS_MAP = self.PARAMS_MAP | {
106
+ "neutral_citation": "[2023] Test 123",
107
+ "best_human_identifier": "[2023] Test 123",
108
+ }
109
+
110
+ super().__init__()
111
+
112
+ @classmethod
113
+ def build(
114
+ cls,
115
+ uri: str = "test/2023/123/press-summary/1",
116
+ html: str = "<p>This is a judgment.</p>",
117
+ api_client: Optional[MarklogicApiClient] = None,
118
+ **kwargs: Any,
119
+ ) -> PressSummary:
120
+ return cast(PressSummary, super().build(uri, html, api_client, **kwargs))
121
+
122
+
123
+ class SimpleFactory:
124
+ # "name_of_attribute": "default value"
125
+ PARAMS_MAP: dict[str, Any]
126
+
127
+ target_class: TypeAlias = object
128
+
129
+ @classmethod
130
+ def build(cls, **kwargs: Any) -> target_class:
131
+ mock_object = Mock(spec=cls.target_class, autospec=True)
132
+
133
+ for param, default in cls.PARAMS_MAP.items():
134
+ if param in kwargs:
135
+ setattr(mock_object.return_value, param, kwargs[param])
136
+ else:
137
+ setattr(mock_object.return_value, param, default)
138
+
139
+ return mock_object()
140
+
141
+
142
+ class SearchResultMetadataFactory(SimpleFactory):
143
+ target_class = SearchResultMetadata
144
+ # "name_of_attribute": "default value"
145
+ PARAMS_MAP = {
146
+ "author": "Fake Name",
147
+ "author_email": "fake.email@gov.invalid",
148
+ "consignment_reference": "TDR-2023-ABC",
149
+ "submission_datetime": datetime.datetime(2023, 2, 3, 9, 12, 34),
150
+ }
151
+
152
+
153
+ class SearchResultFactory(SimpleFactory):
154
+ target_class = SearchResult
155
+
156
+ # "name_of_attribute": ("name of incoming param", "default value")
157
+ PARAMS_MAP = {
158
+ "uri": "test/2023/123",
159
+ "name": "Judgment v Judgement",
160
+ "neutral_citation": "[2023] Test 123",
161
+ "court": "Court of Testing",
162
+ "date": datetime.date(2023, 2, 3),
163
+ "metadata": SearchResultMetadataFactory.build(),
164
+ "is_failure": False,
165
+ }
@@ -1,27 +1,22 @@
1
1
  import datetime
2
2
  import warnings
3
3
  from functools import cached_property
4
- from typing import TYPE_CHECKING, Any, Dict, NewType, Optional
4
+ from typing import TYPE_CHECKING, Any, NewType, Optional
5
5
 
6
- import pytz
7
6
  from ds_caselaw_utils import courts
8
7
  from ds_caselaw_utils.courts import CourtNotFoundException
9
- from lxml import etree
8
+ from ds_caselaw_utils.types import NeutralCitationString
10
9
  from lxml import html as html_parser
11
10
  from requests_toolbelt.multipart import decoder
12
11
 
13
- from caselawclient.models.utilities import extract_version
14
- from caselawclient.models.utilities.dates import parse_string_date_as_utc
15
-
16
- from ..errors import (
12
+ from caselawclient.errors import (
17
13
  DocumentNotFoundError,
18
14
  GatewayTimeoutError,
19
15
  NotSupportedOnVersion,
20
16
  OnlySupportedOnVersion,
21
17
  )
22
- from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
23
- from .utilities import VersionsDict, render_versions
24
- from .utilities.aws import (
18
+ from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
19
+ from caselawclient.models.utilities.aws import (
25
20
  ParserInstructionsDict,
26
21
  announce_document_event,
27
22
  check_docx_exists,
@@ -34,31 +29,17 @@ from .utilities.aws import (
34
29
  uri_for_s3,
35
30
  )
36
31
 
37
- MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
38
-
32
+ from .body import DocumentBody
33
+ from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
34
+ from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
39
35
 
40
- class UnparsableDate(Warning):
41
- pass
36
+ MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
42
37
 
43
38
 
44
39
  class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
45
40
  pass
46
41
 
47
42
 
48
- DOCUMENT_STATUS_HOLD = "On hold"
49
- """ This document has been placed on hold to actively prevent publication. """
50
-
51
- DOCUMENT_STATUS_PUBLISHED = "Published"
52
- """ This document has been published and should be considered publicly visible. """
53
-
54
- DOCUMENT_STATUS_IN_PROGRESS = "In progress"
55
- """ This document has not been published or put on hold, and has been picked up by an editor and
56
- should be progressing through the document pipeline. """
57
-
58
- DOCUMENT_STATUS_NEW = "New"
59
- """ This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
60
-
61
-
62
43
  DOCUMENT_COLLECTION_URI_JUDGMENT = "judgment"
63
44
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY = "press-summary"
64
45
 
@@ -67,19 +48,6 @@ if TYPE_CHECKING:
67
48
 
68
49
 
69
50
  DocumentURIString = NewType("DocumentURIString", str)
70
- CourtIdentifierString = NewType("CourtIdentifierString", str)
71
-
72
-
73
- class CannotPublishUnpublishableDocument(Exception):
74
- """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
75
-
76
-
77
- class DocumentNotSafeForDeletion(Exception):
78
- """A document which is not safe for deletion cannot be deleted."""
79
-
80
-
81
- class NonXMLDocumentError(Exception):
82
- """A document cannot be parsed as XML."""
83
51
 
84
52
 
85
53
  class Document:
@@ -96,7 +64,7 @@ class Document:
96
64
 
97
65
  attributes_to_validate: list[tuple[str, bool, str]] = [
98
66
  (
99
- "failed_to_parse",
67
+ "is_failure",
100
68
  False,
101
69
  "This document failed to parse",
102
70
  ),
@@ -143,20 +111,18 @@ class Document:
143
111
 
144
112
  :raises DocumentNotFoundError: The document does not exist within MarkLogic
145
113
  """
146
- self.uri = DocumentURIString(uri.strip("/"))
147
- self.api_client = api_client
114
+ self.uri: DocumentURIString = DocumentURIString(uri.strip("/"))
115
+ self.api_client: MarklogicApiClient = api_client
148
116
  if not self.document_exists():
149
117
  raise DocumentNotFoundError(f"Document {self.uri} does not exist")
150
118
 
151
- self.xml = self.XML(
152
- xml_bytestring=self.api_client.get_judgment_xml_bytestring(
153
- self.uri,
154
- show_unpublished=True,
155
- ),
119
+ self.body: DocumentBody = DocumentBody(
120
+ xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
156
121
  )
122
+ """ `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
157
123
 
158
124
  def __repr__(self) -> str:
159
- name = self.name or "un-named"
125
+ name = self.body.name or "un-named"
160
126
  return f"<{self.document_noun} {self.uri}: {name}>"
161
127
 
162
128
  def document_exists(self) -> bool:
@@ -186,104 +152,6 @@ class Document:
186
152
  """
187
153
  return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
188
154
 
189
- @cached_property
190
- def name(self) -> str:
191
- return self.xml.get_xpath_match_string(
192
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
193
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
194
- )
195
-
196
- @cached_property
197
- def court(self) -> str:
198
- return self.xml.get_xpath_match_string(
199
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
200
- {
201
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
202
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
203
- },
204
- )
205
-
206
- @cached_property
207
- def jurisdiction(self) -> str:
208
- return self.xml.get_xpath_match_string(
209
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
210
- {
211
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
212
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
213
- },
214
- )
215
-
216
- @property
217
- def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
218
- if self.jurisdiction != "":
219
- return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
220
- return CourtIdentifierString(self.court)
221
-
222
- @cached_property
223
- def document_date_as_string(self) -> str:
224
- return self.xml.get_xpath_match_string(
225
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
226
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
227
- )
228
-
229
- @cached_property
230
- def document_date_as_date(self) -> Optional[datetime.date]:
231
- if not self.document_date_as_string:
232
- return None
233
- try:
234
- return datetime.datetime.strptime(
235
- self.document_date_as_string,
236
- "%Y-%m-%d",
237
- ).date()
238
- except ValueError:
239
- warnings.warn(
240
- f"Unparsable date encountered: {self.document_date_as_string}",
241
- UnparsableDate,
242
- )
243
- return None
244
-
245
- def get_manifestation_datetimes(
246
- self,
247
- name: Optional[str] = None,
248
- ) -> list[datetime.datetime]:
249
- name_filter = f"[@name='{name}']" if name else ""
250
- iso_datetimes = self.xml.get_xpath_match_strings(
251
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
252
- f"/akn:FRBRdate{name_filter}/@date",
253
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
254
- )
255
-
256
- return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
257
-
258
- def get_latest_manifestation_datetime(
259
- self,
260
- name: Optional[str] = None,
261
- ) -> Optional[datetime.datetime]:
262
- events = self.get_manifestation_datetimes(name)
263
- if not events:
264
- return None
265
- return max(events)
266
-
267
- def get_latest_manifestation_type(self) -> Optional[str]:
268
- return max(
269
- (
270
- (type, time)
271
- for type in ["transform", "tna-enriched"]
272
- if (time := self.get_latest_manifestation_datetime(type))
273
- ),
274
- key=lambda x: x[1],
275
- )[0]
276
-
277
- @cached_property
278
- def transformation_datetime(self) -> Optional[datetime.datetime]:
279
- """When was this document successfully parsed or reparsed (date from XML)"""
280
- return self.get_latest_manifestation_datetime("transform")
281
-
282
- @cached_property
283
- def enrichment_datetime(self) -> Optional[datetime.datetime]:
284
- """When was this document successfully enriched (date from XML)"""
285
- return self.get_latest_manifestation_datetime("tna-enriched")
286
-
287
155
  @cached_property
288
156
  def is_published(self) -> bool:
289
157
  return self.api_client.get_published(self.uri)
@@ -372,10 +240,6 @@ class Document:
372
240
  "Is this document a potentially historic version of a document, or is it the main document itself?"
373
241
  return extract_version(self.uri) != 0
374
242
 
375
- @cached_property
376
- def content_as_xml(self) -> str:
377
- return self.xml.xml_as_string
378
-
379
243
  def content_as_html(
380
244
  self,
381
245
  version_uri: Optional[DocumentURIString] = None,
@@ -418,39 +282,21 @@ class Document:
418
282
 
419
283
  :return: `True` if this document is in a 'failure' state, otherwise `False`
420
284
  """
421
- if self.failed_to_parse:
422
- return True
423
- return False
285
+ return self.body.failed_to_parse
424
286
 
425
287
  @cached_property
426
288
  def is_parked(self) -> bool:
427
- if "parked" in self.uri:
428
- return True
429
- return False
430
-
431
- @cached_property
432
- def failed_to_parse(self) -> bool:
433
- """
434
- Did this document entirely fail to parse?
435
-
436
- :return: `True` if there was a complete parser failure, otherwise `False`
437
- """
438
- if "error" in self.xml.root_element:
439
- return True
440
- return False
289
+ return "parked" in self.uri
441
290
 
442
291
  @cached_property
443
292
  def has_name(self) -> bool:
444
- if not self.name:
445
- return False
446
-
447
- return True
293
+ return bool(self.body.name)
448
294
 
449
295
  @cached_property
450
296
  def has_valid_court(self) -> bool:
451
297
  try:
452
298
  return bool(
453
- courts.get_by_code(self.court_and_jurisdiction_identifier_string),
299
+ courts.get_by_code(self.body.court_and_jurisdiction_identifier_string),
454
300
  )
455
301
  except CourtNotFoundException:
456
302
  return False
@@ -521,9 +367,7 @@ class Document:
521
367
  """
522
368
  Is it sensible to enrich this document?
523
369
  """
524
- if (self.enriched_recently is False) and self.validates_against_schema:
525
- return True
526
- return False
370
+ return (self.enriched_recently is False) and self.validates_against_schema
527
371
 
528
372
  @cached_property
529
373
  def enriched_recently(self) -> bool:
@@ -531,14 +375,13 @@ class Document:
531
375
  Has this document been enriched recently?
532
376
  """
533
377
 
534
- last_enrichment = self.enrichment_datetime
378
+ last_enrichment = self.body.enrichment_datetime
535
379
  if not last_enrichment:
536
380
  return False
537
381
 
538
382
  now = datetime.datetime.now(tz=datetime.timezone.utc)
539
- if now - last_enrichment < MINIMUM_ENRICHMENT_TIME:
540
- return True
541
- return False
383
+
384
+ return now - last_enrichment < MINIMUM_ENRICHMENT_TIME
542
385
 
543
386
  @cached_property
544
387
  def validates_against_schema(self) -> bool:
@@ -599,10 +442,7 @@ class Document:
599
442
  else:
600
443
  raise DocumentNotSafeForDeletion
601
444
 
602
- def overwrite(self, new_citation: str) -> None:
603
- self.api_client.overwrite_document(self.uri, new_citation)
604
-
605
- def move(self, new_citation: str) -> None:
445
+ def move(self, new_citation: NeutralCitationString) -> None:
606
446
  self.api_client.update_document_uri(self.uri, new_citation)
607
447
 
608
448
  def force_reparse(self) -> None:
@@ -612,7 +452,11 @@ class Document:
612
452
  self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
613
453
 
614
454
  parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
615
- checked_date = self.document_date_as_string if self.document_date_as_string > "1001" else None
455
+ checked_date: Optional[str] = (
456
+ self.body.document_date_as_date.isoformat()
457
+ if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
458
+ else None
459
+ )
616
460
 
617
461
  # the keys of parser_instructions should exactly match the parser output
618
462
  # in the *-metadata.json files by the parser. Whilst typically empty
@@ -621,9 +465,9 @@ class Document:
621
465
  parser_instructions: ParserInstructionsDict = {
622
466
  "documentType": parser_type_noun,
623
467
  "metadata": {
624
- "name": self.name or None,
468
+ "name": self.body.name or None,
625
469
  "cite": self.best_human_identifier or None,
626
- "court": self.court or None,
470
+ "court": self.body.court or None,
627
471
  "date": checked_date,
628
472
  "uri": self.uri,
629
473
  },
@@ -650,41 +494,4 @@ class Document:
650
494
  """
651
495
  Is it sensible to reparse this document?
652
496
  """
653
- if self.docx_exists():
654
- return True
655
- return False
656
-
657
- class XML:
658
- """
659
- Represents the XML of a document, and should contain all methods for interacting with it.
660
- """
661
-
662
- def __init__(self, xml_bytestring: bytes):
663
- """
664
- :raises NonXMLDocumentError: This document is not valid XML
665
- """
666
- try:
667
- self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
668
- except etree.XMLSyntaxError:
669
- raise NonXMLDocumentError
670
-
671
- @property
672
- def xml_as_string(self) -> str:
673
- """
674
- :return: A string representation of this document's XML tree.
675
- """
676
- return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
677
-
678
- @property
679
- def root_element(self) -> str:
680
- return str(self.xml_as_tree.tag)
681
-
682
- def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
683
- return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
684
-
685
- def get_xpath_match_strings(
686
- self,
687
- xpath: str,
688
- namespaces: Dict[str, str],
689
- ) -> list[str]:
690
- return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
497
+ return self.docx_exists()