ds-caselaw-marklogic-api-client 17.2.0__tar.gz → 18.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/PKG-INFO +2 -2
  2. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/pyproject.toml +4 -2
  3. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/Client.py +0 -14
  4. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/__init__.py +0 -20
  5. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/client_helpers/search_helpers.py +10 -4
  6. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/documents.py +105 -37
  7. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/judgments.py +1 -1
  8. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/press_summaries.py +1 -1
  9. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/utilities/__init__.py +0 -9
  10. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/utilities/aws.py +61 -7
  11. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/responses/search_response.py +4 -16
  12. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/responses/search_result.py +10 -22
  13. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/search_parameters.py +2 -0
  14. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/LICENSE.md +0 -0
  15. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/README.md +0 -0
  16. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  17. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/content_hash.py +0 -0
  18. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/errors.py +0 -0
  19. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/__init__.py +0 -0
  20. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  21. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/models/utilities/move.py +0 -0
  22. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/py.typed +0 -0
  23. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/responses/__init__.py +0 -0
  24. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  25. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xml_helpers.py +0 -0
  26. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xml_tools.py +0 -0
  27. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  28. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  29. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  30. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  31. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  32. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  33. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  34. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  35. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  36. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  37. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  38. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  39. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  40. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  41. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  42. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  43. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  44. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  45. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-17.2.0 → ds_caselaw_marklogic_api_client-18.0.0}/src/caselawclient/xquery_type_dicts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 17.2.0
3
+ Version: 18.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -14,7 +14,7 @@ Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
14
  Requires-Dist: certifi (>=2022.12.7,<2024.0.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.11.0,<0.12.0)
17
- Requires-Dist: ds-caselaw-utils (>=1.0.0,<2.0.0)
17
+ Requires-Dist: ds-caselaw-utils (>=1.3.3,<2.0.0)
18
18
  Requires-Dist: idna (>=3.4,<4.0)
19
19
  Requires-Dist: lxml (>=4.9.2,<5.0.0)
20
20
  Requires-Dist: memoization (>=0.4.0,<0.5.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "17.2.0"
3
+ version = "18.0.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -21,7 +21,7 @@ requests-toolbelt = ">=0.10.1,<1.1.0"
21
21
  urllib3 = "^1.26.15"
22
22
  memoization = "^0.4.0"
23
23
  lxml = "^4.9.2"
24
- ds-caselaw-utils = "^1.0.0"
24
+ ds-caselaw-utils = "^1.3.3"
25
25
  boto3 = "^1.26.112"
26
26
  typing-extensions = "^4.7.1"
27
27
  mypy-boto3-s3 = "^1.26.104"
@@ -47,3 +47,5 @@ build-backend = "poetry.core.masonry.api"
47
47
  markers = [
48
48
  "write: the test deliberately changes the Marklogic DB')",
49
49
  ]
50
+
51
+ filterwarnings = ["ignore::DeprecationWarning"]
@@ -951,17 +951,3 @@ class MarklogicApiClient:
951
951
  )
952
952
 
953
953
  return results
954
-
955
-
956
- api_client = MarklogicApiClient(
957
- host=env("MARKLOGIC_HOST", default=None),
958
- username=env("MARKLOGIC_USER", default=None),
959
- password=env("MARKLOGIC_PASSWORD", default=None),
960
- use_https=env("MARKLOGIC_USE_HTTPS", default=False),
961
- )
962
- """
963
- An instance of the API client which is automatically initialised on importing the library.
964
-
965
- .. deprecated:: 13.0.1
966
- You should instead initialise your own instance of `MarklogicApiClient`
967
- """
@@ -37,24 +37,4 @@ client = MarklogicApiClient(
37
37
 
38
38
  ```
39
39
 
40
- ## (Deprecated) Use in-library client instance
41
-
42
- This library will automatically initialise an instance of the client. This functionality is deprecated, and will be
43
- removed.
44
-
45
- The client expects the following environment variables to be set or defined in a `.env` file:
46
-
47
- ```bash
48
- MARKLOGIC_HOST
49
- MARKLOGIC_USER
50
- MARKLOGIC_PASSWORD
51
- MARKLOGIC_USE_HTTPS # Optional, defaults to False
52
- ```
53
-
54
- Then import `api_client` from `caselawclient.Client`:
55
-
56
- ```python
57
- from caselawclient.Client import api_client
58
- ```
59
-
60
40
  """
@@ -1,3 +1,5 @@
1
+ from lxml import etree
2
+
1
3
  from caselawclient.Client import MarklogicApiClient
2
4
  from caselawclient.responses.search_response import SearchResponse
3
5
  from caselawclient.search_parameters import SearchParameters
@@ -14,8 +16,11 @@ def search_judgments_and_parse_response(
14
16
 
15
17
  :return: The parsed search response as a SearchResponse object
16
18
  """
17
- return SearchResponse.from_response_string(
18
- api_client.search_judgments_and_decode_response(search_parameters)
19
+ return SearchResponse(
20
+ etree.fromstring(
21
+ api_client.search_judgments_and_decode_response(search_parameters)
22
+ ),
23
+ api_client,
19
24
  )
20
25
 
21
26
 
@@ -30,6 +35,7 @@ def search_and_parse_response(
30
35
 
31
36
  :return: The parsed search response as a SearchResponse object
32
37
  """
33
- return SearchResponse.from_response_string(
34
- api_client.search_and_decode_response(search_parameters)
38
+ return SearchResponse(
39
+ etree.fromstring(api_client.search_and_decode_response(search_parameters)),
40
+ api_client,
35
41
  )
@@ -18,13 +18,15 @@ from ..errors import (
18
18
  OnlySupportedOnVersion,
19
19
  )
20
20
  from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
21
- from .utilities import VersionsDict, get_judgment_root, render_versions
21
+ from .utilities import VersionsDict, render_versions
22
22
  from .utilities.aws import (
23
+ ParserInstructionsDict,
24
+ announce_document_event,
23
25
  delete_documents_from_private_bucket,
24
26
  generate_docx_url,
25
27
  generate_pdf_url,
26
- notify_changed,
27
28
  publish_documents,
29
+ request_parse,
28
30
  unpublish_documents,
29
31
  uri_for_s3,
30
32
  )
@@ -74,6 +76,12 @@ class DocumentNotSafeForDeletion(Exception):
74
76
  pass
75
77
 
76
78
 
79
+ class NonXMLDocumentError(Exception):
80
+ """A document cannot be parsed as XML."""
81
+
82
+ pass
83
+
84
+
77
85
  class Document:
78
86
  """
79
87
  A base class from which all other document types are extensions. This class includes the essential methods for
@@ -140,6 +148,12 @@ class Document:
140
148
  if not self.document_exists():
141
149
  raise DocumentNotFoundError(f"Document {self.uri} does not exist")
142
150
 
151
+ self.xml = self.XML(
152
+ xml_bytestring=self.api_client.get_judgment_xml_bytestring(
153
+ self.uri, show_unpublished=True
154
+ )
155
+ )
156
+
143
157
  def document_exists(self) -> bool:
144
158
  """Helper method to verify the existence of a document within MarkLogic.
145
159
 
@@ -165,14 +179,14 @@ class Document:
165
179
 
166
180
  @cached_property
167
181
  def name(self) -> str:
168
- return self._get_xpath_match_string(
182
+ return self.xml.get_xpath_match_string(
169
183
  "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
170
184
  {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
171
185
  )
172
186
 
173
187
  @cached_property
174
188
  def court(self) -> str:
175
- return self._get_xpath_match_string(
189
+ return self.xml.get_xpath_match_string(
176
190
  "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
177
191
  {
178
192
  "uk": "https://caselaw.nationalarchives.gov.uk/akn",
@@ -182,7 +196,7 @@ class Document:
182
196
 
183
197
  @cached_property
184
198
  def document_date_as_string(self) -> str:
185
- return self._get_xpath_match_string(
199
+ return self.xml.get_xpath_match_string(
186
200
  "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
187
201
  {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
188
202
  )
@@ -206,7 +220,7 @@ class Document:
206
220
  self, name: Optional[str] = None
207
221
  ) -> list[datetime.datetime]:
208
222
  name_filter = f"[@name='{name}']" if name else ""
209
- iso_datetimes = self._get_xpath_match_strings(
223
+ iso_datetimes = self.xml.get_xpath_match_strings(
210
224
  "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
211
225
  f"/akn:FRBRdate{name_filter}/@date",
212
226
  {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
@@ -248,6 +262,14 @@ class Document:
248
262
  def is_held(self) -> bool:
249
263
  return self.api_client.get_property(self.uri, "editor-hold") == "true"
250
264
 
265
+ @cached_property
266
+ def is_locked(self) -> bool:
267
+ return self.checkout_message is not None
268
+
269
+ @cached_property
270
+ def checkout_message(self) -> Optional[str]:
271
+ return self.api_client.get_judgment_checkout_status_message(self.uri)
272
+
251
273
  @cached_property
252
274
  def source_name(self) -> str:
253
275
  return self.api_client.get_property(self.uri, "source-name")
@@ -322,17 +344,7 @@ class Document:
322
344
 
323
345
  @cached_property
324
346
  def content_as_xml(self) -> str:
325
- return self.api_client.get_judgment_xml(self.uri, show_unpublished=True)
326
-
327
- @cached_property
328
- def content_as_xml_bytestring(self) -> bytes:
329
- return self.api_client.get_judgment_xml_bytestring(
330
- self.uri, show_unpublished=True
331
- )
332
-
333
- @cached_property
334
- def content_as_xml_tree(self) -> Any:
335
- return etree.fromstring(self.content_as_xml_bytestring)
347
+ return self.xml.xml_as_string
336
348
 
337
349
  def content_as_html(
338
350
  self,
@@ -391,13 +403,10 @@ class Document:
391
403
 
392
404
  :return: `True` if there was a complete parser failure, otherwise `False`
393
405
  """
394
- if "error" in self._get_root():
406
+ if "error" in self.xml.root_element:
395
407
  return True
396
408
  return False
397
409
 
398
- def _get_root(self) -> str:
399
- return get_judgment_root(self.content_as_xml_bytestring)
400
-
401
410
  @cached_property
402
411
  def has_name(self) -> bool:
403
412
  if not self.name:
@@ -448,9 +457,12 @@ class Document:
448
457
  return DOCUMENT_STATUS_NEW
449
458
 
450
459
  def enrich(self) -> None:
451
- notify_changed(
460
+ """
461
+ Announces to the ANNOUNCE SNS that the document is waiting to be enriched.
462
+ """
463
+ announce_document_event(
452
464
  uri=self.uri,
453
- status="published",
465
+ status="enrich",
454
466
  enrich=True,
455
467
  )
456
468
 
@@ -464,20 +476,19 @@ class Document:
464
476
 
465
477
  publish_documents(uri_for_s3(self.uri))
466
478
  self.api_client.set_published(self.uri, True)
467
- notify_changed(
479
+ announce_document_event(
468
480
  uri=self.uri,
469
- status="published",
470
- enrich=True,
481
+ status="publish",
471
482
  )
483
+ self.enrich()
472
484
 
473
485
  def unpublish(self) -> None:
474
486
  self.api_client.break_checkout(self.uri)
475
487
  unpublish_documents(uri_for_s3(self.uri))
476
488
  self.api_client.set_published(self.uri, False)
477
- notify_changed(
489
+ announce_document_event(
478
490
  uri=self.uri,
479
- status="not published",
480
- enrich=False,
491
+ status="unpublish",
481
492
  )
482
493
 
483
494
  def hold(self) -> None:
@@ -507,16 +518,73 @@ class Document:
507
518
  else:
508
519
  raise DocumentNotSafeForDeletion()
509
520
 
510
- def _get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
511
- return get_xpath_match_string(self.content_as_xml_tree, xpath, namespaces)
512
-
513
- def _get_xpath_match_strings(
514
- self, xpath: str, namespaces: Dict[str, str]
515
- ) -> list[str]:
516
- return get_xpath_match_strings(self.content_as_xml_tree, xpath, namespaces)
517
-
518
521
  def overwrite(self, new_citation: str) -> None:
519
522
  self.api_client.overwrite_document(self.uri, new_citation)
520
523
 
521
524
  def move(self, new_citation: str) -> None:
522
525
  self.api_client.update_document_uri(self.uri, new_citation)
526
+
527
+ def reparse(self) -> None:
528
+ "Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
529
+
530
+ parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[
531
+ self.document_noun
532
+ ]
533
+ checked_date = (
534
+ self.document_date_as_string
535
+ if self.document_date_as_string > "1001"
536
+ else None
537
+ )
538
+
539
+ # the keys of parser_instructions should exactly match the parser output
540
+ # in the *-metadata.json files by the parser. Whilst typically empty
541
+ # values are "" from the API, we should pass None instead in this case.
542
+
543
+ parser_instructions: ParserInstructionsDict = {
544
+ "name": self.name or None,
545
+ "cite": self.best_human_identifier or None,
546
+ "court": self.court or None,
547
+ "date": checked_date,
548
+ "uri": self.uri,
549
+ "documentType": parser_type_noun,
550
+ "published": self.is_published,
551
+ }
552
+
553
+ request_parse(
554
+ uri=self.uri,
555
+ reference=self.consignment_reference,
556
+ parser_instructions=parser_instructions,
557
+ )
558
+
559
+ class XML:
560
+ """
561
+ Represents the XML of a document, and should contain all methods for interacting with it.
562
+ """
563
+
564
+ def __init__(self, xml_bytestring: bytes):
565
+ """
566
+ :raises NonXMLDocumentError: This document is not valid XML
567
+ """
568
+ try:
569
+ self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
570
+ except etree.XMLSyntaxError:
571
+ raise NonXMLDocumentError
572
+
573
+ @property
574
+ def xml_as_string(self) -> str:
575
+ """
576
+ :return: A string representation of this document's XML tree.
577
+ """
578
+ return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
579
+
580
+ @property
581
+ def root_element(self) -> str:
582
+ return str(self.xml_as_tree.tag)
583
+
584
+ def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
585
+ return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
586
+
587
+ def get_xpath_match_strings(
588
+ self, xpath: str, namespaces: Dict[str, str]
589
+ ) -> list[str]:
590
+ return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
@@ -21,7 +21,7 @@ class Judgment(NeutralCitationMixin, Document):
21
21
  @cached_property
22
22
  def neutral_citation(self) -> str:
23
23
  return get_xpath_match_string(
24
- self.content_as_xml_tree,
24
+ self.xml.xml_as_tree,
25
25
  "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
26
26
  {
27
27
  "uk": "https://caselaw.nationalarchives.gov.uk/akn",
@@ -21,7 +21,7 @@ class PressSummary(NeutralCitationMixin, Document):
21
21
  @cached_property
22
22
  def neutral_citation(self) -> str:
23
23
  return get_xpath_match_string(
24
- self.content_as_xml_tree,
24
+ self.xml.xml_as_tree,
25
25
  "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
26
26
  {
27
27
  "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
@@ -1,5 +1,4 @@
1
1
  import re
2
- import xml.etree.ElementTree as ET
3
2
  from typing import TypedDict
4
3
 
5
4
  from requests_toolbelt.multipart.decoder import BodyPart
@@ -12,14 +11,6 @@ akn_namespace = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"}
12
11
  uk_namespace = {"uk": "https://caselaw.nationalarchives.gov.uk/akn"}
13
12
 
14
13
 
15
- def get_judgment_root(judgment_xml: bytes) -> str:
16
- try:
17
- parsed_xml = ET.XML(judgment_xml)
18
- return parsed_xml.tag
19
- except ET.ParseError:
20
- return "error"
21
-
22
-
23
14
  class VersionsDict(TypedDict):
24
15
  uri: str
25
16
  version: int
@@ -1,6 +1,8 @@
1
+ import datetime
1
2
  import json
2
3
  import logging
3
- from typing import Any, Literal, Union, overload
4
+ import uuid
5
+ from typing import Any, Literal, Optional, TypedDict, Union, overload
4
6
 
5
7
  import boto3
6
8
  import botocore.client
@@ -9,10 +11,21 @@ from mypy_boto3_s3.client import S3Client
9
11
  from mypy_boto3_s3.type_defs import CopySourceTypeDef, ObjectIdentifierTypeDef
10
12
  from mypy_boto3_sns.client import SNSClient
11
13
  from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
14
+ from typing_extensions import NotRequired
12
15
 
13
16
  env = environ.Env()
14
17
 
15
18
 
19
+ class ParserInstructionsDict(TypedDict):
20
+ name: NotRequired[Optional[str]]
21
+ cite: NotRequired[Optional[str]]
22
+ court: NotRequired[Optional[str]]
23
+ date: NotRequired[Optional[str]]
24
+ uri: NotRequired[Optional[str]]
25
+ documentType: NotRequired[Optional[str]]
26
+ published: NotRequired[bool]
27
+
28
+
16
29
  @overload
17
30
  def create_aws_client(service: Literal["s3"]) -> S3Client:
18
31
  ...
@@ -64,10 +77,14 @@ def generate_signed_asset_url(key: str) -> str:
64
77
  )
65
78
 
66
79
 
67
- def generate_docx_url(uri: str) -> str:
68
- key = f'{uri}/{uri.replace("/", "_")}.docx'
80
+ def generate_docx_key(uri: str) -> str:
81
+ """from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
82
+ return f'{uri}/{uri.replace("/", "_")}.docx'
69
83
 
70
- return generate_signed_asset_url(key)
84
+
85
+ def generate_docx_url(uri: str) -> str:
86
+ """from a canonical caselaw URI (eat/2022/1) return a signed S3 link for the front end"""
87
+ return generate_signed_asset_url(generate_docx_key(uri))
71
88
 
72
89
 
73
90
  def generate_pdf_url(uri: str) -> str:
@@ -110,7 +127,7 @@ def publish_documents(uri: str) -> None:
110
127
 
111
128
  if not key.endswith("parser.log") and not key.endswith(".tar.gz"):
112
129
  source: CopySourceTypeDef = {"Bucket": private_bucket, "Key": key}
113
- extra_args = {"ACL": "public-read"}
130
+ extra_args: dict[str, str] = {}
114
131
  try:
115
132
  client.copy(source, public_bucket, key, extra_args)
116
133
  except botocore.client.ClientError as e:
@@ -127,7 +144,7 @@ def delete_documents_from_private_bucket(uri: str) -> None:
127
144
  delete_from_bucket(uri, env("PRIVATE_ASSET_BUCKET"))
128
145
 
129
146
 
130
- def notify_changed(uri: str, status: str, enrich: bool = False) -> None:
147
+ def announce_document_event(uri: str, status: str, enrich: bool = False) -> None:
131
148
  client = create_sns_client()
132
149
 
133
150
  message_attributes: dict[str, MessageAttributeValueTypeDef] = {}
@@ -146,7 +163,7 @@ def notify_changed(uri: str, status: str, enrich: bool = False) -> None:
146
163
  }
147
164
 
148
165
  client.publish(
149
- TopicArn=env("SNS_TOPIC"),
166
+ TopicArn=env("SNS_TOPIC"), # this is the ANNOUNCE SNS topic
150
167
  Message=json.dumps({"uri_reference": uri, "status": status}),
151
168
  Subject=f"Updated: {uri} {status}",
152
169
  MessageAttributes=message_attributes,
@@ -189,3 +206,40 @@ def build_new_key(old_key: str, new_uri: str) -> str:
189
206
  return f"{new_uri}/{new_filename}.{old_filename.split('.')[-1]}"
190
207
  else:
191
208
  return f"{new_uri}/{old_filename}"
209
+
210
+
211
+ def request_parse(
212
+ uri: str,
213
+ reference: Optional[str],
214
+ parser_instructions: Optional[ParserInstructionsDict] = None,
215
+ ) -> None:
216
+ client = create_sns_client()
217
+
218
+ if parser_instructions is None:
219
+ parser_instructions = ParserInstructionsDict({})
220
+
221
+ message_to_send = {
222
+ "properties": {
223
+ "messageType": "uk.gov.nationalarchives.da.messages.request.courtdocument.parse.RequestCourtDocumentParse",
224
+ "timestamp": datetime.datetime.now(datetime.timezone.utc)
225
+ .isoformat()
226
+ .replace("+00:00", "Z"),
227
+ "function": "fcl-judgment-parse-request",
228
+ "producer": "FCL",
229
+ "executionId": f"fcl_ex_id_{uuid.uuid4()}",
230
+ "parentExecutionId": None,
231
+ },
232
+ "parameters": {
233
+ "s3Bucket": env("PRIVATE_ASSET_BUCKET"),
234
+ "s3Key": generate_docx_key(uri),
235
+ "reference": reference or f"FCL-{uuid.uuid4()}",
236
+ "originator": "FCL",
237
+ "parserInstructions": parser_instructions,
238
+ },
239
+ }
240
+
241
+ client.publish(
242
+ TopicArn=env("REPARSE_SNS_TOPIC"),
243
+ Message=json.dumps(message_to_send),
244
+ Subject=f"Reparse request: {uri}",
245
+ )
@@ -2,6 +2,7 @@ from typing import List
2
2
 
3
3
  from lxml import etree
4
4
 
5
+ from caselawclient.Client import MarklogicApiClient
5
6
  from caselawclient.responses.search_result import SearchResult
6
7
 
7
8
 
@@ -13,22 +14,14 @@ class SearchResponse:
13
14
  NAMESPACES = {"search": "http://marklogic.com/appservices/search"}
14
15
  """ Namespaces used in XPath expressions."""
15
16
 
16
- def __init__(self, node: etree._Element) -> None:
17
+ def __init__(self, node: etree._Element, client: MarklogicApiClient) -> None:
17
18
  """
18
19
  Initializes a SearchResponse instance from an xml node.
19
20
 
20
21
  :param node: The XML data as an etree element
21
22
  """
22
23
  self.node = node
23
-
24
- @staticmethod
25
- def from_response_string(xml: str) -> "SearchResponse":
26
- """
27
- Constructs a SearchResponse instance from an xml response string.
28
-
29
- :param xml: The XML data as a string
30
- """
31
- return SearchResponse(etree.fromstring(xml))
24
+ self.client = client
32
25
 
33
26
  @property
34
27
  def total(self) -> str:
@@ -51,9 +44,4 @@ class SearchResponse:
51
44
  results = self.node.xpath(
52
45
  "//search:response/search:result", namespaces=self.NAMESPACES
53
46
  )
54
- return [
55
- SearchResult(
56
- result,
57
- )
58
- for result in results
59
- ]
47
+ return [SearchResult(result, self.client) for result in results]
@@ -2,6 +2,7 @@ import logging
2
2
  import os
3
3
  from datetime import datetime
4
4
  from enum import Enum
5
+ from functools import cached_property
5
6
  from typing import Dict, Optional
6
7
 
7
8
  from dateutil import parser as dateparser
@@ -9,7 +10,7 @@ from dateutil.parser import ParserError
9
10
  from ds_caselaw_utils.courts import Court, CourtNotFoundException, courts
10
11
  from lxml import etree
11
12
 
12
- from caselawclient.Client import api_client
13
+ from caselawclient.Client import MarklogicApiClient
13
14
  from caselawclient.models.documents import DocumentURIString
14
15
  from caselawclient.xml_helpers import get_xpath_match_string
15
16
 
@@ -44,20 +45,6 @@ class SearchResultMetadata:
44
45
  self.node = node
45
46
  self.last_modified = last_modified
46
47
 
47
- @staticmethod
48
- def create_from_uri(uri: DocumentURIString) -> "SearchResultMetadata":
49
- """
50
- Create a SearchResultMetadata instance from a search result URI.
51
-
52
- :param uri: The URI of the search result
53
-
54
- :return: The created SearchResultMetadata instance
55
- """
56
- response_text = api_client.get_properties_for_search_results([uri])
57
- last_modified = api_client.get_last_modified(uri)
58
- root = etree.fromstring(response_text)
59
- return SearchResultMetadata(root, last_modified)
60
-
61
48
  @property
62
49
  def author(self) -> str:
63
50
  """
@@ -162,12 +149,13 @@ class SearchResult:
162
149
  }
163
150
  """ Namespace mappings used in XPath expressions. """
164
151
 
165
- def __init__(self, node: etree._Element):
152
+ def __init__(self, node: etree._Element, client: MarklogicApiClient):
166
153
  """
167
154
  :param node: The XML element representing the search result
168
155
  """
169
156
 
170
157
  self.node = node
158
+ self.client = client
171
159
 
172
160
  @property
173
161
  def uri(self) -> DocumentURIString:
@@ -259,15 +247,15 @@ class SearchResult:
259
247
  xslt_transform = etree.XSLT(etree.parse(file_path))
260
248
  return str(xslt_transform(self.node))
261
249
 
262
- @property
250
+ @cached_property
263
251
  def metadata(self) -> SearchResultMetadata:
264
252
  """
265
- :return: The metadata of the search result
253
+ :return: A `SearchResultMetadata` instance representing the metadata of this result
266
254
  """
267
-
268
- return SearchResultMetadata.create_from_uri(
269
- self.uri,
270
- )
255
+ response_text = self.client.get_properties_for_search_results([self.uri])
256
+ last_modified = self.client.get_last_modified(self.uri)
257
+ root = etree.fromstring(response_text)
258
+ return SearchResultMetadata(root, last_modified)
271
259
 
272
260
  def _get_xpath_match_string(self, path: str) -> str:
273
261
  return get_xpath_match_string(self.node, path, namespaces=self.NAMESPACES)
@@ -73,6 +73,8 @@ class SearchParameters:
73
73
  "ewhc/kb": "ewhc/qb",
74
74
  "ewhc/scco": "ewhc/costs",
75
75
  "ewhc/costs": "ewhc/scco",
76
+ "ukait": "ukut/iac",
77
+ "ukut/iac": "ukait",
76
78
  }
77
79
  alternative_court_names = set()
78
80
  for primary_name, secondary_name in ALTERNATIVE_COURT_NAMES_MAP.items():