ds-caselaw-marklogic-api-client 25.0.0__tar.gz → 26.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (70) hide show
  1. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/PKG-INFO +1 -1
  2. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/pyproject.toml +12 -4
  3. ds_caselaw_marklogic_api_client-25.0.0/src/caselawclient/models/documents.py → ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/__init__.py +26 -205
  4. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/body.py +142 -0
  5. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/exceptions.py +6 -0
  6. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/statuses.py +12 -0
  7. ds_caselaw_marklogic_api_client-26.0.0/src/caselawclient/models/documents/xml.py +43 -0
  8. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/judgments.py +1 -3
  9. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/press_summaries.py +1 -3
  10. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/aws.py +2 -0
  11. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/LICENSE.md +0 -0
  12. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/README.md +0 -0
  13. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/Client.py +0 -0
  14. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/__init__.py +0 -0
  15. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  16. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  17. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/content_hash.py +0 -0
  18. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/errors.py +0 -0
  19. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/__init__.py +0 -0
  20. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  21. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  22. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/dates.py +0 -0
  23. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/models/utilities/move.py +0 -0
  24. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/py.typed +0 -0
  25. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/__init__.py +0 -0
  26. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/search_response.py +0 -0
  27. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/search_result.py +0 -0
  28. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  29. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/search_parameters.py +0 -0
  30. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xml_helpers.py +0 -0
  31. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  32. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  33. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  34. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  35. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  36. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  37. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  38. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  39. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  40. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  41. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  42. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  43. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  44. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  45. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-25.0.0 → ds_caselaw_marklogic_api_client-26.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 25.0.0
3
+ Version: 26.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "25.0.0"
3
+ version = "26.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -42,6 +42,13 @@ optional = true
42
42
  [tool.poetry.group.docs.dependencies]
43
43
  pdoc = "^14.0.0"
44
44
 
45
+
46
+ [tool.commitizen]
47
+ name = "cz_conventional_commits"
48
+ tag_format = "v$version"
49
+ version_scheme = "semver2"
50
+ version_provider = "poetry"
51
+ update_changelog_on_bump = true
45
52
  [build-system]
46
53
  requires = ["poetry-core"]
47
54
  build-backend = "poetry.core.masonry.api"
@@ -56,15 +63,16 @@ filterwarnings = ["ignore::DeprecationWarning"]
56
63
  line-length = 120
57
64
 
58
65
  [tool.ruff.lint]
59
- ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
60
- extend-select = ["W", "I"]
66
+ ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
67
+ extend-select = ["W", "I", "SLF"]
61
68
  # extend-select = [ "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
62
- # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
69
+ # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
63
70
  # "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
64
71
  unfixable = ["ERA"]
65
72
 
66
73
  [tool.ruff.lint.extend-per-file-ignores]
67
74
  "tests/*" = ["S101"] # `assert` is fine in tests
75
+ "tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
68
76
 
69
77
  # things skipped:
70
78
  # N: naming, possibly good
@@ -1,27 +1,21 @@
1
1
  import datetime
2
2
  import warnings
3
3
  from functools import cached_property
4
- from typing import TYPE_CHECKING, Any, Dict, NewType, Optional
4
+ from typing import TYPE_CHECKING, Any, NewType, Optional
5
5
 
6
- import pytz
7
6
  from ds_caselaw_utils import courts
8
7
  from ds_caselaw_utils.courts import CourtNotFoundException
9
- from lxml import etree
10
8
  from lxml import html as html_parser
11
9
  from requests_toolbelt.multipart import decoder
12
10
 
13
- from caselawclient.models.utilities import extract_version
14
- from caselawclient.models.utilities.dates import parse_string_date_as_utc
15
-
16
- from ..errors import (
11
+ from caselawclient.errors import (
17
12
  DocumentNotFoundError,
18
13
  GatewayTimeoutError,
19
14
  NotSupportedOnVersion,
20
15
  OnlySupportedOnVersion,
21
16
  )
22
- from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
23
- from .utilities import VersionsDict, render_versions
24
- from .utilities.aws import (
17
+ from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
18
+ from caselawclient.models.utilities.aws import (
25
19
  ParserInstructionsDict,
26
20
  announce_document_event,
27
21
  check_docx_exists,
@@ -34,31 +28,17 @@ from .utilities.aws import (
34
28
  uri_for_s3,
35
29
  )
36
30
 
37
- MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
38
-
31
+ from .body import DocumentBody
32
+ from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
33
+ from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
39
34
 
40
- class UnparsableDate(Warning):
41
- pass
35
+ MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
42
36
 
43
37
 
44
38
  class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
45
39
  pass
46
40
 
47
41
 
48
- DOCUMENT_STATUS_HOLD = "On hold"
49
- """ This document has been placed on hold to actively prevent publication. """
50
-
51
- DOCUMENT_STATUS_PUBLISHED = "Published"
52
- """ This document has been published and should be considered publicly visible. """
53
-
54
- DOCUMENT_STATUS_IN_PROGRESS = "In progress"
55
- """ This document has not been published or put on hold, and has been picked up by an editor and
56
- should be progressing through the document pipeline. """
57
-
58
- DOCUMENT_STATUS_NEW = "New"
59
- """ This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
60
-
61
-
62
42
  DOCUMENT_COLLECTION_URI_JUDGMENT = "judgment"
63
43
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY = "press-summary"
64
44
 
@@ -67,19 +47,6 @@ if TYPE_CHECKING:
67
47
 
68
48
 
69
49
  DocumentURIString = NewType("DocumentURIString", str)
70
- CourtIdentifierString = NewType("CourtIdentifierString", str)
71
-
72
-
73
- class CannotPublishUnpublishableDocument(Exception):
74
- """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
75
-
76
-
77
- class DocumentNotSafeForDeletion(Exception):
78
- """A document which is not safe for deletion cannot be deleted."""
79
-
80
-
81
- class NonXMLDocumentError(Exception):
82
- """A document cannot be parsed as XML."""
83
50
 
84
51
 
85
52
  class Document:
@@ -96,7 +63,7 @@ class Document:
96
63
 
97
64
  attributes_to_validate: list[tuple[str, bool, str]] = [
98
65
  (
99
- "failed_to_parse",
66
+ "is_failure",
100
67
  False,
101
68
  "This document failed to parse",
102
69
  ),
@@ -143,20 +110,18 @@ class Document:
143
110
 
144
111
  :raises DocumentNotFoundError: The document does not exist within MarkLogic
145
112
  """
146
- self.uri = DocumentURIString(uri.strip("/"))
147
- self.api_client = api_client
113
+ self.uri: DocumentURIString = DocumentURIString(uri.strip("/"))
114
+ self.api_client: MarklogicApiClient = api_client
148
115
  if not self.document_exists():
149
116
  raise DocumentNotFoundError(f"Document {self.uri} does not exist")
150
117
 
151
- self.xml = self.XML(
152
- xml_bytestring=self.api_client.get_judgment_xml_bytestring(
153
- self.uri,
154
- show_unpublished=True,
155
- ),
118
+ self.body: DocumentBody = DocumentBody(
119
+ xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
156
120
  )
121
+ """ `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
157
122
 
158
123
  def __repr__(self) -> str:
159
- name = self.name or "un-named"
124
+ name = self.body.name or "un-named"
160
125
  return f"<{self.document_noun} {self.uri}: {name}>"
161
126
 
162
127
  def document_exists(self) -> bool:
@@ -186,104 +151,6 @@ class Document:
186
151
  """
187
152
  return f"https://caselaw.nationalarchives.gov.uk/{self.uri}"
188
153
 
189
- @cached_property
190
- def name(self) -> str:
191
- return self.xml.get_xpath_match_string(
192
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
193
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
194
- )
195
-
196
- @cached_property
197
- def court(self) -> str:
198
- return self.xml.get_xpath_match_string(
199
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
200
- {
201
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
202
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
203
- },
204
- )
205
-
206
- @cached_property
207
- def jurisdiction(self) -> str:
208
- return self.xml.get_xpath_match_string(
209
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
210
- {
211
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
212
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
213
- },
214
- )
215
-
216
- @property
217
- def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
218
- if self.jurisdiction != "":
219
- return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
220
- return CourtIdentifierString(self.court)
221
-
222
- @cached_property
223
- def document_date_as_string(self) -> str:
224
- return self.xml.get_xpath_match_string(
225
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
226
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
227
- )
228
-
229
- @cached_property
230
- def document_date_as_date(self) -> Optional[datetime.date]:
231
- if not self.document_date_as_string:
232
- return None
233
- try:
234
- return datetime.datetime.strptime(
235
- self.document_date_as_string,
236
- "%Y-%m-%d",
237
- ).date()
238
- except ValueError:
239
- warnings.warn(
240
- f"Unparsable date encountered: {self.document_date_as_string}",
241
- UnparsableDate,
242
- )
243
- return None
244
-
245
- def get_manifestation_datetimes(
246
- self,
247
- name: Optional[str] = None,
248
- ) -> list[datetime.datetime]:
249
- name_filter = f"[@name='{name}']" if name else ""
250
- iso_datetimes = self.xml.get_xpath_match_strings(
251
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
252
- f"/akn:FRBRdate{name_filter}/@date",
253
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
254
- )
255
-
256
- return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
257
-
258
- def get_latest_manifestation_datetime(
259
- self,
260
- name: Optional[str] = None,
261
- ) -> Optional[datetime.datetime]:
262
- events = self.get_manifestation_datetimes(name)
263
- if not events:
264
- return None
265
- return max(events)
266
-
267
- def get_latest_manifestation_type(self) -> Optional[str]:
268
- return max(
269
- (
270
- (type, time)
271
- for type in ["transform", "tna-enriched"]
272
- if (time := self.get_latest_manifestation_datetime(type))
273
- ),
274
- key=lambda x: x[1],
275
- )[0]
276
-
277
- @cached_property
278
- def transformation_datetime(self) -> Optional[datetime.datetime]:
279
- """When was this document successfully parsed or reparsed (date from XML)"""
280
- return self.get_latest_manifestation_datetime("transform")
281
-
282
- @cached_property
283
- def enrichment_datetime(self) -> Optional[datetime.datetime]:
284
- """When was this document successfully enriched (date from XML)"""
285
- return self.get_latest_manifestation_datetime("tna-enriched")
286
-
287
154
  @cached_property
288
155
  def is_published(self) -> bool:
289
156
  return self.api_client.get_published(self.uri)
@@ -372,10 +239,6 @@ class Document:
372
239
  "Is this document a potentially historic version of a document, or is it the main document itself?"
373
240
  return extract_version(self.uri) != 0
374
241
 
375
- @cached_property
376
- def content_as_xml(self) -> str:
377
- return self.xml.xml_as_string
378
-
379
242
  def content_as_html(
380
243
  self,
381
244
  version_uri: Optional[DocumentURIString] = None,
@@ -418,7 +281,7 @@ class Document:
418
281
 
419
282
  :return: `True` if this document is in a 'failure' state, otherwise `False`
420
283
  """
421
- if self.failed_to_parse:
284
+ if self.body.failed_to_parse:
422
285
  return True
423
286
  return False
424
287
 
@@ -428,20 +291,9 @@ class Document:
428
291
  return True
429
292
  return False
430
293
 
431
- @cached_property
432
- def failed_to_parse(self) -> bool:
433
- """
434
- Did this document entirely fail to parse?
435
-
436
- :return: `True` if there was a complete parser failure, otherwise `False`
437
- """
438
- if "error" in self.xml.root_element:
439
- return True
440
- return False
441
-
442
294
  @cached_property
443
295
  def has_name(self) -> bool:
444
- if not self.name:
296
+ if not self.body.name:
445
297
  return False
446
298
 
447
299
  return True
@@ -450,7 +302,7 @@ class Document:
450
302
  def has_valid_court(self) -> bool:
451
303
  try:
452
304
  return bool(
453
- courts.get_by_code(self.court_and_jurisdiction_identifier_string),
305
+ courts.get_by_code(self.body.court_and_jurisdiction_identifier_string),
454
306
  )
455
307
  except CourtNotFoundException:
456
308
  return False
@@ -531,7 +383,7 @@ class Document:
531
383
  Has this document been enriched recently?
532
384
  """
533
385
 
534
- last_enrichment = self.enrichment_datetime
386
+ last_enrichment = self.body.enrichment_datetime
535
387
  if not last_enrichment:
536
388
  return False
537
389
 
@@ -612,7 +464,11 @@ class Document:
612
464
  self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
613
465
 
614
466
  parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
615
- checked_date = self.document_date_as_string if self.document_date_as_string > "1001" else None
467
+ checked_date: Optional[str] = (
468
+ self.body.document_date_as_date.isoformat()
469
+ if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
470
+ else None
471
+ )
616
472
 
617
473
  # the keys of parser_instructions should exactly match the parser output
618
474
  # in the *-metadata.json files by the parser. Whilst typically empty
@@ -621,9 +477,9 @@ class Document:
621
477
  parser_instructions: ParserInstructionsDict = {
622
478
  "documentType": parser_type_noun,
623
479
  "metadata": {
624
- "name": self.name or None,
480
+ "name": self.body.name or None,
625
481
  "cite": self.best_human_identifier or None,
626
- "court": self.court or None,
482
+ "court": self.body.court or None,
627
483
  "date": checked_date,
628
484
  "uri": self.uri,
629
485
  },
@@ -653,38 +509,3 @@ class Document:
653
509
  if self.docx_exists():
654
510
  return True
655
511
  return False
656
-
657
- class XML:
658
- """
659
- Represents the XML of a document, and should contain all methods for interacting with it.
660
- """
661
-
662
- def __init__(self, xml_bytestring: bytes):
663
- """
664
- :raises NonXMLDocumentError: This document is not valid XML
665
- """
666
- try:
667
- self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
668
- except etree.XMLSyntaxError:
669
- raise NonXMLDocumentError
670
-
671
- @property
672
- def xml_as_string(self) -> str:
673
- """
674
- :return: A string representation of this document's XML tree.
675
- """
676
- return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
677
-
678
- @property
679
- def root_element(self) -> str:
680
- return str(self.xml_as_tree.tag)
681
-
682
- def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
683
- return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
684
-
685
- def get_xpath_match_strings(
686
- self,
687
- xpath: str,
688
- namespaces: Dict[str, str],
689
- ) -> list[str]:
690
- return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
@@ -0,0 +1,142 @@
1
+ import datetime
2
+ import warnings
3
+ from functools import cached_property
4
+ from typing import NewType, Optional
5
+
6
+ import pytz
7
+
8
+ from caselawclient.models.utilities.dates import parse_string_date_as_utc
9
+
10
+ from .xml import XML
11
+
12
+ CourtIdentifierString = NewType("CourtIdentifierString", str)
13
+
14
+
15
+ class UnparsableDate(Warning):
16
+ pass
17
+
18
+
19
+ class DocumentBody:
20
+ """
21
+ A class for abstracting out interactions with the body of a document.
22
+ """
23
+
24
+ def __init__(self, xml_bytestring: bytes):
25
+ self._xml = XML(xml_bytestring=xml_bytestring)
26
+ """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
27
+
28
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
29
+ return self._xml.get_xpath_match_string(xpath, namespaces)
30
+
31
+ @cached_property
32
+ def name(self) -> str:
33
+ return self._xml.get_xpath_match_string(
34
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
35
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
36
+ )
37
+
38
+ @cached_property
39
+ def court(self) -> str:
40
+ return self._xml.get_xpath_match_string(
41
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
42
+ {
43
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
44
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
45
+ },
46
+ )
47
+
48
+ @cached_property
49
+ def jurisdiction(self) -> str:
50
+ return self._xml.get_xpath_match_string(
51
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
52
+ {
53
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
54
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
55
+ },
56
+ )
57
+
58
+ @property
59
+ def court_and_jurisdiction_identifier_string(self) -> CourtIdentifierString:
60
+ if self.jurisdiction != "":
61
+ return CourtIdentifierString("/".join((self.court, self.jurisdiction)))
62
+ return CourtIdentifierString(self.court)
63
+
64
+ @cached_property
65
+ def document_date_as_string(self) -> str:
66
+ return self._xml.get_xpath_match_string(
67
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
68
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
69
+ )
70
+
71
+ @cached_property
72
+ def document_date_as_date(self) -> Optional[datetime.date]:
73
+ if not self.document_date_as_string:
74
+ return None
75
+ try:
76
+ return datetime.datetime.strptime(
77
+ self.document_date_as_string,
78
+ "%Y-%m-%d",
79
+ ).date()
80
+ except ValueError:
81
+ warnings.warn(
82
+ f"Unparsable date encountered: {self.document_date_as_string}",
83
+ UnparsableDate,
84
+ )
85
+ return None
86
+
87
+ def get_manifestation_datetimes(
88
+ self,
89
+ name: Optional[str] = None,
90
+ ) -> list[datetime.datetime]:
91
+ name_filter = f"[@name='{name}']" if name else ""
92
+ iso_datetimes = self._xml.get_xpath_match_strings(
93
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
94
+ f"/akn:FRBRdate{name_filter}/@date",
95
+ {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
96
+ )
97
+
98
+ return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
99
+
100
+ def get_latest_manifestation_datetime(
101
+ self,
102
+ name: Optional[str] = None,
103
+ ) -> Optional[datetime.datetime]:
104
+ events = self.get_manifestation_datetimes(name)
105
+ if not events:
106
+ return None
107
+ return max(events)
108
+
109
+ def get_latest_manifestation_type(self) -> Optional[str]:
110
+ return max(
111
+ (
112
+ (type, time)
113
+ for type in ["transform", "tna-enriched"]
114
+ if (time := self.get_latest_manifestation_datetime(type))
115
+ ),
116
+ key=lambda x: x[1],
117
+ )[0]
118
+
119
+ @cached_property
120
+ def transformation_datetime(self) -> Optional[datetime.datetime]:
121
+ """When was this document successfully parsed or reparsed (date from XML)"""
122
+ return self.get_latest_manifestation_datetime("transform")
123
+
124
+ @cached_property
125
+ def enrichment_datetime(self) -> Optional[datetime.datetime]:
126
+ """When was this document successfully enriched (date from XML)"""
127
+ return self.get_latest_manifestation_datetime("tna-enriched")
128
+
129
+ @cached_property
130
+ def content_as_xml(self) -> str:
131
+ return self._xml.xml_as_string
132
+
133
+ @cached_property
134
+ def failed_to_parse(self) -> bool:
135
+ """
136
+ Did this document entirely fail to parse?
137
+
138
+ :return: `True` if there was a complete parser failure, otherwise `False`
139
+ """
140
+ if "error" in self._xml.root_element:
141
+ return True
142
+ return False
@@ -0,0 +1,6 @@
1
+ class CannotPublishUnpublishableDocument(Exception):
2
+ """A document which has failed publication safety checks in `Document.is_publishable` cannot be published."""
3
+
4
+
5
+ class DocumentNotSafeForDeletion(Exception):
6
+ """A document which is not safe for deletion cannot be deleted."""
@@ -0,0 +1,12 @@
1
+ DOCUMENT_STATUS_HOLD = "On hold"
2
+ """ This document has been placed on hold to actively prevent publication. """
3
+
4
+ DOCUMENT_STATUS_PUBLISHED = "Published"
5
+ """ This document has been published and should be considered publicly visible. """
6
+
7
+ DOCUMENT_STATUS_IN_PROGRESS = "In progress"
8
+ """ This document has not been published or put on hold, and has been picked up by an editor and
9
+ should be progressing through the document pipeline. """
10
+
11
+ DOCUMENT_STATUS_NEW = "New"
12
+ """ This document isn't published, on hold, or assigned, and can be picked up by an editor in the future. """
@@ -0,0 +1,43 @@
1
+ from lxml import etree
2
+
3
+ from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
4
+
5
+
6
+ class NonXMLDocumentError(Exception):
7
+ """A document cannot be parsed as XML."""
8
+
9
+
10
+ class XML:
11
+ """
12
+ A class for interacting with the raw XML of a document.
13
+ """
14
+
15
+ def __init__(self, xml_bytestring: bytes):
16
+ """
17
+ :raises NonXMLDocumentError: This document is not valid XML
18
+ """
19
+ try:
20
+ self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
21
+ except etree.XMLSyntaxError:
22
+ raise NonXMLDocumentError
23
+
24
+ @property
25
+ def xml_as_string(self) -> str:
26
+ """
27
+ :return: A string representation of this document's XML tree.
28
+ """
29
+ return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
30
+
31
+ @property
32
+ def root_element(self) -> str:
33
+ return str(self.xml_as_tree.tag)
34
+
35
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
36
+ return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
37
+
38
+ def get_xpath_match_strings(
39
+ self,
40
+ xpath: str,
41
+ namespaces: dict[str, str],
42
+ ) -> list[str]:
43
+ return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
@@ -8,7 +8,6 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
8
8
  if TYPE_CHECKING:
9
9
  from caselawclient.models.press_summaries import PressSummary
10
10
 
11
- from ..xml_helpers import get_xpath_match_string
12
11
  from .documents import Document
13
12
 
14
13
 
@@ -25,8 +24,7 @@ class Judgment(NeutralCitationMixin, Document):
25
24
 
26
25
  @cached_property
27
26
  def neutral_citation(self) -> str:
28
- return get_xpath_match_string(
29
- self.xml.xml_as_tree,
27
+ return self.body.get_xpath_match_string(
30
28
  "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
31
29
  {
32
30
  "uk": "https://caselaw.nationalarchives.gov.uk/akn",
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Any, Optional
6
6
 
7
7
  from caselawclient.errors import DocumentNotFoundError
8
8
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
9
- from caselawclient.xml_helpers import get_xpath_match_string
10
9
 
11
10
  from .documents import Document
12
11
 
@@ -27,8 +26,7 @@ class PressSummary(NeutralCitationMixin, Document):
27
26
 
28
27
  @cached_property
29
28
  def neutral_citation(self) -> str:
30
- return get_xpath_match_string(
31
- self.xml.xml_as_tree,
29
+ return self.body.get_xpath_match_string(
32
30
  "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
33
31
  {
34
32
  "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
@@ -249,6 +249,8 @@ def request_parse(
249
249
  },
250
250
  }
251
251
 
252
+ # breakpoint()
253
+
252
254
  client.publish(
253
255
  TopicArn=env("REPARSE_SNS_TOPIC"),
254
256
  Message=json.dumps(message_to_send),