ds-caselaw-marklogic-api-client 40.0.0__py3-none-any.whl → 44.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. caselawclient/Client.py +36 -5
  2. caselawclient/client_helpers/__init__.py +0 -114
  3. caselawclient/factories.py +3 -1
  4. caselawclient/managers/__init__.py +0 -0
  5. caselawclient/managers/merge/__init__.py +51 -0
  6. caselawclient/managers/merge/checks.py +79 -0
  7. caselawclient/models/documents/__init__.py +60 -3
  8. caselawclient/models/documents/body.py +53 -9
  9. caselawclient/models/documents/versions.py +114 -0
  10. caselawclient/models/documents/xml.py +4 -1
  11. caselawclient/models/identifiers/__init__.py +4 -1
  12. caselawclient/models/identifiers/collection.py +2 -0
  13. caselawclient/models/utilities/aws.py +35 -1
  14. caselawclient/search_parameters.py +4 -0
  15. caselawclient/types.py +43 -6
  16. caselawclient/xml_helpers.py +18 -2
  17. caselawclient/xquery/check_content_hash_unique_by_uri.xqy +15 -0
  18. caselawclient/xquery/get_judgment.xqy +2 -12
  19. caselawclient/xquery/get_pending_parse_for_version_count.xqy +26 -0
  20. caselawclient/xquery/{get_pending_parse_for_version.xqy → get_pending_parse_for_version_documents.xqy} +2 -2
  21. caselawclient/xquery/xslt_transform.xqy +4 -28
  22. caselawclient/xquery_type_dicts.py +13 -2
  23. {ds_caselaw_marklogic_api_client-40.0.0.dist-info → ds_caselaw_marklogic_api_client-44.0.3.dist-info}/METADATA +3 -2
  24. {ds_caselaw_marklogic_api_client-40.0.0.dist-info → ds_caselaw_marklogic_api_client-44.0.3.dist-info}/RECORD +26 -20
  25. {ds_caselaw_marklogic_api_client-40.0.0.dist-info → ds_caselaw_marklogic_api_client-44.0.3.dist-info}/LICENSE.md +0 -0
  26. {ds_caselaw_marklogic_api_client-40.0.0.dist-info → ds_caselaw_marklogic_api_client-44.0.3.dist-info}/WHEEL +0 -0
caselawclient/Client.py CHANGED
@@ -21,19 +21,20 @@ from requests.structures import CaseInsensitiveDict
21
21
  from requests_toolbelt.multipart import decoder
22
22
 
23
23
  from caselawclient import xquery_type_dicts as query_dicts
24
- from caselawclient.client_helpers import VersionAnnotation
25
24
  from caselawclient.identifier_resolution import IdentifierResolutions
26
25
  from caselawclient.models.documents import (
27
26
  DOCUMENT_COLLECTION_URI_JUDGMENT,
28
27
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
29
28
  Document,
30
29
  )
30
+ from caselawclient.models.documents.versions import VersionAnnotation
31
31
  from caselawclient.models.judgments import Judgment
32
32
  from caselawclient.models.press_summaries import PressSummary
33
33
  from caselawclient.models.utilities import move
34
34
  from caselawclient.search_parameters import SearchParameters
35
35
  from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
36
36
  from caselawclient.xquery_type_dicts import (
37
+ CheckContentHashUniqueByUriDict,
37
38
  MarkLogicDocumentURIString,
38
39
  MarkLogicDocumentVersionURIString,
39
40
  MarkLogicPrivilegeURIString,
@@ -728,6 +729,14 @@ class MarklogicApiClient:
728
729
  == 0
729
730
  )
730
731
 
732
+ def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
733
+ """
734
+ Returns True if the content hash for this document is unique (not shared with other documents).
735
+ """
736
+ uri = self._format_uri_for_marklogic(judgment_uri)
737
+ vars: CheckContentHashUniqueByUriDict = {"uri": uri}
738
+ return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
739
+
731
740
  def eval(
732
741
  self,
733
742
  xquery_path: str,
@@ -793,6 +802,8 @@ class MarklogicApiClient:
793
802
  :param judge:
794
803
  :param party:
795
804
  :param neutral_citation:
805
+ :param document_name:
806
+ :param consignment_number:
796
807
  :param specific_keyword:
797
808
  :param order:
798
809
  :param date_from:
@@ -1211,13 +1222,13 @@ class MarklogicApiClient:
1211
1222
 
1212
1223
  return (int(table[1][1]), int(table[1][2]))
1213
1224
 
1214
- def get_pending_parse_for_version(
1225
+ def get_documents_pending_parse_for_version(
1215
1226
  self,
1216
1227
  target_version: tuple[int, int],
1217
1228
  maximum_records: int = 1000,
1218
1229
  ) -> list[list[Any]]:
1219
- """Retrieve documents which are not yet parsed with a given version."""
1220
- vars: query_dicts.GetPendingParseForVersionDict = {
1230
+ """Retrieve a list of documents which are not yet parsed with a given version."""
1231
+ vars: query_dicts.GetPendingParseForVersionDocumentsDict = {
1221
1232
  "target_major_version": target_version[0],
1222
1233
  "target_minor_version": target_version[1],
1223
1234
  "maximum_records": maximum_records,
@@ -1226,13 +1237,33 @@ class MarklogicApiClient:
1226
1237
  get_single_string_from_marklogic_response(
1227
1238
  self._send_to_eval(
1228
1239
  vars,
1229
- "get_pending_parse_for_version.xqy",
1240
+ "get_pending_parse_for_version_documents.xqy",
1230
1241
  ),
1231
1242
  ),
1232
1243
  )
1233
1244
 
1234
1245
  return results
1235
1246
 
1247
+ def get_count_pending_parse_for_version(
1248
+ self,
1249
+ target_version: tuple[int, int],
1250
+ ) -> int:
1251
+ """Get the total number of documents which are not yet parsed with a given version."""
1252
+ vars: query_dicts.GetPendingParseForVersionCountDict = {
1253
+ "target_major_version": target_version[0],
1254
+ "target_minor_version": target_version[1],
1255
+ }
1256
+ results = json.loads(
1257
+ get_single_string_from_marklogic_response(
1258
+ self._send_to_eval(
1259
+ vars,
1260
+ "get_pending_parse_for_version_count.xqy",
1261
+ ),
1262
+ ),
1263
+ )
1264
+
1265
+ return int(results[1][0])
1266
+
1236
1267
  def get_recently_parsed(
1237
1268
  self,
1238
1269
  ) -> list[list[Any]]:
@@ -1,9 +1,4 @@
1
- import json
2
- from enum import Enum
3
- from typing import Any, Optional, TypedDict
4
-
5
1
  from lxml import etree
6
- from typing_extensions import NotRequired
7
2
 
8
3
  from caselawclient.xml_helpers import DEFAULT_NAMESPACES
9
4
 
@@ -17,115 +12,6 @@ class CannotDetermineDocumentType(Exception):
17
12
  pass
18
13
 
19
14
 
20
- class AnnotationDataDict(TypedDict):
21
- type: str
22
- calling_function: str
23
- calling_agent: str
24
- message: NotRequired[str]
25
- payload: NotRequired[dict[str, Any]]
26
- automated: bool
27
-
28
-
29
- class VersionType(Enum):
30
- """Valid types of version."""
31
-
32
- SUBMISSION = "submission"
33
- """ This version has been created as a result of a submission of a new document. """
34
-
35
- ENRICHMENT = "enrichment"
36
- """ This version has been created through an enrichment process. """
37
-
38
- EDIT = "edit"
39
- """ This version has been created as the result of a manual edit. """
40
-
41
-
42
- class VersionAnnotation:
43
- """A class holding structured data about the reason for a version."""
44
-
45
- def __init__(
46
- self,
47
- version_type: VersionType,
48
- automated: bool,
49
- message: Optional[str] = None,
50
- payload: Optional[dict[str, Any]] = None,
51
- ):
52
- """
53
- :param version_type: The type of version being created
54
- :param automated: `True` if this action has happened as the result of an automated process, rather than a human
55
- action
56
- :param message: A human-readable string containing information about the version which can't be expressed in the
57
- structured data.
58
- :param payload: A dict containing additional information relevant to this version change
59
- """
60
- self.version_type = version_type
61
- self.automated = automated
62
- self.message = message
63
- self.payload = payload
64
-
65
- self.calling_function: Optional[str] = None
66
- self.calling_agent: Optional[str] = None
67
-
68
- def set_calling_function(self, calling_function: str) -> None:
69
- """
70
- Set the name of the calling function for tracing purposes
71
-
72
- :param calling_function: The name of the function which is performing the database write
73
- """
74
- self.calling_function = calling_function
75
-
76
- def set_calling_agent(self, calling_agent: str) -> None:
77
- """
78
- Set the name of the calling agent for tracing purposes
79
-
80
- :param calling_agent: The name of the agent which is performing the database write
81
- """
82
- self.calling_agent = calling_agent
83
-
84
- @property
85
- def structured_annotation_dict(self) -> AnnotationDataDict:
86
- """
87
- :return: A structured dict representing this `VersionAnnotation`
88
-
89
- :raises AttributeError: The name of the calling function has not been set; use `set_calling_function()`
90
- :raises AttributeError: The name of the calling agent has not been set; use `set_calling_agent()`
91
- """
92
- if not self.calling_function:
93
- raise AttributeError(
94
- "The name of the calling function has not been set; use set_calling_function()",
95
- )
96
-
97
- if not self.calling_agent:
98
- raise AttributeError(
99
- "The name of the calling agent has not been set; use set_calling_agent()",
100
- )
101
-
102
- annotation_data: AnnotationDataDict = {
103
- "type": self.version_type.value,
104
- "calling_function": self.calling_function,
105
- "calling_agent": self.calling_agent,
106
- "automated": self.automated,
107
- }
108
-
109
- if self.message:
110
- annotation_data["message"] = self.message
111
-
112
- if self.payload:
113
- annotation_data["payload"] = self.payload
114
-
115
- return annotation_data
116
-
117
- @property
118
- def as_json(self) -> str:
119
- """Render the structured annotation data as JSON, so it can be stored in the MarkLogic dls:annotation field.
120
-
121
- :return: A JSON string representing this `VersionAnnotation`"""
122
-
123
- return json.dumps(self.structured_annotation_dict)
124
-
125
- def __str__(self) -> str:
126
- return self.as_json
127
-
128
-
129
15
  def get_document_type_class(xml: bytes) -> type[Document]:
130
16
  """Attempt to get the type of the document based on the top-level structure of the XML document."""
131
17
 
@@ -20,7 +20,7 @@ T = TypeVar("T")
20
20
 
21
21
  DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
22
22
  <judgment name="decision">
23
- <meta/><header/>
23
+ <meta/><header><p>Header contains text</p></header>
24
24
  <judgmentBody>
25
25
  <decision>
26
26
  <p>This is a document.</p>
@@ -61,6 +61,8 @@ class DocumentFactory:
61
61
  "source_name": "Example Uploader",
62
62
  "source_email": "uploader@example.com",
63
63
  "consignment_reference": "TDR-12345",
64
+ "first_published_datetime": None,
65
+ "has_ever_been_published": False,
64
66
  "assigned_to": "",
65
67
  "versions": [],
66
68
  }
File without changes
@@ -0,0 +1,51 @@
1
+ import caselawclient.managers.merge.checks as checks
2
+ from caselawclient.models.documents import Document
3
+ from caselawclient.types import SuccessFailureMessageTuple
4
+
5
+
6
+ def _combine_list_of_successfailure_results(
7
+ validations: list[SuccessFailureMessageTuple],
8
+ ) -> SuccessFailureMessageTuple:
9
+ """Given a list of SuccessFailureMessageTuples, combine the success/failure states and any messages into a single new object representing the overall success/failure state."""
10
+ success = True
11
+ messages: list[str] = []
12
+
13
+ for validation in validations:
14
+ if validation.success is False:
15
+ success = False
16
+
17
+ messages += validation.messages
18
+
19
+ return SuccessFailureMessageTuple(success, messages)
20
+
21
+
22
+ class MergeManager:
23
+ @classmethod
24
+ def check_document_is_safe_as_merge_source(cls, source_document: Document) -> SuccessFailureMessageTuple:
25
+ """
26
+ Is the given document safe to be considered as a merge source?
27
+ """
28
+
29
+ return _combine_list_of_successfailure_results(
30
+ [
31
+ checks.check_document_is_not_version(source_document),
32
+ checks.check_document_has_only_one_version(source_document),
33
+ checks.check_document_has_never_been_published(source_document),
34
+ checks.check_document_is_safe_to_delete(source_document),
35
+ ]
36
+ )
37
+
38
+ @classmethod
39
+ def check_source_document_is_safe_to_merge_into_target(
40
+ cls, source_document: Document, target_document: Document
41
+ ) -> SuccessFailureMessageTuple:
42
+ """Is the given source document safe to merge into a given target?"""
43
+
44
+ return _combine_list_of_successfailure_results(
45
+ [
46
+ checks.check_documents_are_not_same_document(source_document, target_document),
47
+ checks.check_document_is_not_version(target_document),
48
+ checks.check_documents_are_same_type(source_document, target_document),
49
+ checks.check_source_document_is_newer_than_target(source_document, target_document),
50
+ ]
51
+ )
@@ -0,0 +1,79 @@
1
+ from caselawclient.models.documents import Document
2
+ from caselawclient.types import SuccessFailureMessageTuple
3
+
4
+
5
+ def check_document_is_not_version(document: Document) -> SuccessFailureMessageTuple:
6
+ """Check that the document URI isn't a specific version"""
7
+ if document.is_version:
8
+ return SuccessFailureMessageTuple(
9
+ False,
10
+ ["This document is a specific version, and cannot be used as a merge source"],
11
+ )
12
+
13
+ return SuccessFailureMessageTuple(True, [])
14
+
15
+
16
+ def check_document_has_only_one_version(document: Document) -> SuccessFailureMessageTuple:
17
+ """Make sure the document has exactly one version."""
18
+ if len(document.versions) > 1:
19
+ return SuccessFailureMessageTuple(
20
+ False,
21
+ ["This document has more than one version"],
22
+ )
23
+
24
+ return SuccessFailureMessageTuple(True, [])
25
+
26
+
27
+ def check_document_has_never_been_published(document: Document) -> SuccessFailureMessageTuple:
28
+ """Make sure the document has never been published."""
29
+ if document.has_ever_been_published:
30
+ return SuccessFailureMessageTuple(
31
+ False,
32
+ ["This document has previously been published"],
33
+ )
34
+
35
+ return SuccessFailureMessageTuple(True, [])
36
+
37
+
38
+ def check_document_is_safe_to_delete(document: Document) -> SuccessFailureMessageTuple:
39
+ """Make sure the document is safe to delete."""
40
+ if not document.safe_to_delete:
41
+ return SuccessFailureMessageTuple(
42
+ False,
43
+ ["This document cannot be deleted because it is published"],
44
+ )
45
+
46
+ return SuccessFailureMessageTuple(True, [])
47
+
48
+
49
+ def check_documents_are_not_same_document(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
50
+ """Check that two documents aren't actually the same"""
51
+ if document_one.uri == document_two.uri:
52
+ return SuccessFailureMessageTuple(
53
+ False,
54
+ ["You cannot merge a document with itself"],
55
+ )
56
+ return SuccessFailureMessageTuple(True, [])
57
+
58
+
59
+ def check_documents_are_same_type(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
60
+ """Check to see if this document is the same type as a target document."""
61
+ if type(document_one) is not type(document_two):
62
+ return SuccessFailureMessageTuple(
63
+ False,
64
+ [
65
+ f"The type of {document_one.uri} ({type(document_one).document_noun}) does not match the type of {document_two.uri} ({type(document_two).document_noun})"
66
+ ],
67
+ )
68
+ return SuccessFailureMessageTuple(True, [])
69
+
70
+
71
+ def check_source_document_is_newer_than_target(
72
+ source_document: Document, target_document: Document
73
+ ) -> SuccessFailureMessageTuple:
74
+ """Check to see if the created datetime of the latest version of this document is newer than the created datetime of the latest version of a target document."""
75
+ if source_document.version_created_datetime < target_document.version_created_datetime:
76
+ return SuccessFailureMessageTuple(
77
+ False, [f"The document at {source_document.uri} is older than the latest version of {target_document.uri}"]
78
+ )
79
+ return SuccessFailureMessageTuple(True, [])
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
7
7
  from ds_caselaw_utils import courts
8
8
  from ds_caselaw_utils.courts import CourtNotFoundException
9
9
  from ds_caselaw_utils.types import NeutralCitationString
10
+ from pydantic import TypeAdapter
10
11
  from requests_toolbelt.multipart import decoder
11
12
 
12
13
  import caselawclient.models.documents.comparison as comparison
@@ -16,6 +17,7 @@ from caselawclient.errors import (
16
17
  OnlySupportedOnVersion,
17
18
  )
18
19
  from caselawclient.identifier_resolution import IdentifierResolutions
20
+ from caselawclient.models.documents.versions import AnnotationDataDict
19
21
  from caselawclient.models.identifiers import Identifier
20
22
  from caselawclient.models.identifiers.exceptions import IdentifierValidationException
21
23
  from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
@@ -24,6 +26,7 @@ from caselawclient.models.utilities import VersionsDict, extract_version, render
24
26
  from caselawclient.models.utilities.aws import (
25
27
  ParserInstructionsDict,
26
28
  announce_document_event,
29
+ are_unpublished_assets_clean,
27
30
  check_docx_exists,
28
31
  delete_documents_from_private_bucket,
29
32
  generate_docx_url,
@@ -95,6 +98,16 @@ class Document:
95
98
  True,
96
99
  "The court for this {document_noun} is not valid",
97
100
  ),
101
+ (
102
+ "has_unique_content_hash",
103
+ True,
104
+ "There is another document with identical content",
105
+ ),
106
+ (
107
+ "has_only_clean_assets",
108
+ True,
109
+ "An uncleaned asset exists for this document",
110
+ ),
98
111
  ]
99
112
  """
100
113
  A list of tuples in the form:
@@ -245,7 +258,7 @@ class Document:
245
258
  """
246
259
  if self.is_version:
247
260
  raise NotSupportedOnVersion(
248
- "Cannot get versions of a version for {self.uri}",
261
+ f"Cannot get versions of a version for {self.uri}",
249
262
  )
250
263
  docs = []
251
264
  for version in self.versions:
@@ -307,10 +320,37 @@ class Document:
307
320
 
308
321
  @cached_property
309
322
  def first_published_datetime(self) -> Optional[datetime.datetime]:
323
+ """
324
+ Return the database value for the date and time this document was first published.
325
+
326
+ :return: The datetime value in the database for "first published".
327
+ """
310
328
  return self.api_client.get_datetime_property(self.uri, "first_published_datetime")
311
329
 
330
+ @cached_property
331
+ def first_published_datetime_display(self) -> Optional[datetime.datetime]:
332
+ """
333
+ Return the display value for the date and time this document was first published.
334
+
335
+ A value of 1970-01-01 00:00 indicates that the document has been published previously, but the exact date and time is unknown. In this case, return `None`. This can be used alongside `has_ever_been_published` to indicate an "unknown" state.
336
+
337
+ :return: The datetime value to be displayed to end users for "first published".
338
+ """
339
+
340
+ if self.first_published_datetime == datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc):
341
+ return None
342
+
343
+ return self.first_published_datetime
344
+
312
345
  @cached_property
313
346
  def has_ever_been_published(self) -> bool:
347
+ """
348
+ Do we consider this document to have ever been published?
349
+
350
+ This is `True` if either the document is currently published, or if `first_published_datetime` has any value (including the sentinel value).
351
+
352
+ :return: A boolean indicating if the document has ever been published.
353
+ """
314
354
  return self.is_published or self.first_published_datetime is not None
315
355
 
316
356
  @cached_property
@@ -325,6 +365,23 @@ class Document:
325
365
  def annotation(self) -> str:
326
366
  return self.api_client.get_version_annotation(self.uri)
327
367
 
368
+ @cached_property
369
+ def structured_annotation(self) -> AnnotationDataDict:
370
+ annotation_data_dict_loader = TypeAdapter(AnnotationDataDict)
371
+
372
+ return annotation_data_dict_loader.validate_json(self.annotation)
373
+
374
+ @cached_property
375
+ def has_unique_content_hash(self) -> bool:
376
+ """Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
377
+ return self.api_client.has_unique_content_hash(self.uri)
378
+
379
+ @cached_property
380
+ def has_only_clean_assets(self) -> bool:
381
+ """False if any non-tar.gz assets associated with this document have not been cleaned."""
382
+ return True # TODO: Remove this once we have enabled the asset cleaning pipeline.
383
+ return are_unpublished_assets_clean(self.uri)
384
+
328
385
  @cached_property
329
386
  def version_created_datetime(self) -> datetime.datetime:
330
387
  return self.api_client.get_version_created_datetime(self.uri)
@@ -540,14 +597,14 @@ class Document:
540
597
  """
541
598
  Is it sensible to reparse this document?
542
599
  """
543
- return self.docx_exists()
600
+ return self.docx_exists() and not self.body.has_external_data
544
601
 
545
602
  @cached_property
546
603
  def can_enrich(self) -> bool:
547
604
  """
548
605
  Is it possible to enrich this document?
549
606
  """
550
- return self.body.has_content
607
+ return self.body.has_content and not self.body.has_external_data
551
608
 
552
609
  def validate_identifiers(self) -> SuccessFailureMessageTuple:
553
610
  return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
@@ -6,9 +6,11 @@ from typing import Optional
6
6
 
7
7
  import pytz
8
8
  from ds_caselaw_utils.types import CourtCode
9
+ from lxml import etree
9
10
  from saxonche import PySaxonProcessor
10
11
 
11
12
  from caselawclient.models.utilities.dates import parse_string_date_as_utc
13
+ from caselawclient.types import DocumentCategory
12
14
 
13
15
  from .xml import XML
14
16
 
@@ -37,6 +39,9 @@ class DocumentBody:
37
39
  def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
38
40
  return self._xml.get_xpath_match_strings(xpath, namespaces)
39
41
 
42
+ def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
43
+ return self._xml.get_xpath_nodes(xpath, namespaces)
44
+
40
45
  @cached_property
41
46
  def name(self) -> str:
42
47
  return self.get_xpath_match_string(
@@ -51,9 +56,46 @@ class DocumentBody:
51
56
  def jurisdiction(self) -> str:
52
57
  return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
53
58
 
59
+ @cached_property
60
+ def categories(self) -> list[DocumentCategory]:
61
+ xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
62
+ nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
63
+
64
+ categories: dict[str, DocumentCategory] = {}
65
+ children_map: dict[str, list[DocumentCategory]] = {}
66
+
67
+ for node in nodes:
68
+ name = node.text
69
+ if name is None or not name.strip():
70
+ continue
71
+
72
+ category = DocumentCategory(name=name)
73
+ categories[name] = category
74
+
75
+ parent = node.get("parent")
76
+
77
+ if parent:
78
+ children_map.setdefault(parent, []).append(category)
79
+
80
+ for parent, subcategories in children_map.items():
81
+ if parent in categories:
82
+ categories[parent].subcategories.extend(subcategories)
83
+
84
+ top_level_categories = [
85
+ categories[name]
86
+ for node in nodes
87
+ if node.get("parent") is None
88
+ if (name := node.text) and name in categories
89
+ ]
90
+
91
+ return top_level_categories
92
+
93
+ # NOTE: Deprecated - use categories function
54
94
  @cached_property
55
95
  def category(self) -> Optional[str]:
56
- return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category/text()")
96
+ return self.get_xpath_match_string(
97
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
98
+ )
57
99
 
58
100
  @cached_property
59
101
  def case_number(self) -> Optional[str]:
@@ -135,14 +177,16 @@ class DocumentBody:
135
177
  def has_content(self) -> bool:
136
178
  """If we do not have a word document, the XML will not contain
137
179
  the contents of the judgment, but will contain a preamble."""
138
- trailing_tags = self._xml.xml_as_tree.xpath("//*[preceding::akn:meta]", namespaces=DEFAULT_NAMESPACES)
139
- for tag in trailing_tags:
140
- if tag.tail and tag.tail.strip():
141
- return True
142
- if tag.text and tag.text.strip():
143
- return True
144
-
145
- return False
180
+ return bool(
181
+ self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
182
+ )
183
+
184
+ @cached_property
185
+ def has_external_data(self) -> bool:
186
+ """Is there data which is not present within the source document:
187
+ is there a spreadsheet which has populated some fields. The current implementation
188
+ "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
189
+ return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
146
190
 
147
191
  @cache
148
192
  def content_html(self, image_prefix: str) -> Optional[str]:
@@ -0,0 +1,114 @@
1
+ import json
2
+ from enum import Enum
3
+ from typing import Any, Optional, TypedDict
4
+
5
+ from typing_extensions import NotRequired
6
+
7
+
8
+ class AnnotationDataDict(TypedDict):
9
+ type: str
10
+ calling_function: str
11
+ calling_agent: str
12
+ message: NotRequired[str]
13
+ payload: NotRequired[dict[str, Any]]
14
+ automated: bool
15
+
16
+
17
+ class VersionType(Enum):
18
+ """Valid types of version."""
19
+
20
+ SUBMISSION = "submission"
21
+ """ This version has been created as a result of a submission of a new document. """
22
+
23
+ ENRICHMENT = "enrichment"
24
+ """ This version has been created through an enrichment process. """
25
+
26
+ EDIT = "edit"
27
+ """ This version has been created as the result of a manual edit. """
28
+
29
+
30
+ class VersionAnnotation:
31
+ """A class holding structured data about the reason for a version."""
32
+
33
+ def __init__(
34
+ self,
35
+ version_type: VersionType,
36
+ automated: bool,
37
+ message: Optional[str] = None,
38
+ payload: Optional[dict[str, Any]] = None,
39
+ ):
40
+ """
41
+ :param version_type: The type of version being created
42
+ :param automated: `True` if this action has happened as the result of an automated process, rather than a human
43
+ action
44
+ :param message: A human-readable string containing information about the version which can't be expressed in the
45
+ structured data.
46
+ :param payload: A dict containing additional information relevant to this version change
47
+ """
48
+ self.version_type = version_type
49
+ self.automated = automated
50
+ self.message = message
51
+ self.payload = payload
52
+
53
+ self.calling_function: Optional[str] = None
54
+ self.calling_agent: Optional[str] = None
55
+
56
+ def set_calling_function(self, calling_function: str) -> None:
57
+ """
58
+ Set the name of the calling function for tracing purposes
59
+
60
+ :param calling_function: The name of the function which is performing the database write
61
+ """
62
+ self.calling_function = calling_function
63
+
64
+ def set_calling_agent(self, calling_agent: str) -> None:
65
+ """
66
+ Set the name of the calling agent for tracing purposes
67
+
68
+ :param calling_agent: The name of the agent which is performing the database write
69
+ """
70
+ self.calling_agent = calling_agent
71
+
72
+ @property
73
+ def structured_annotation_dict(self) -> AnnotationDataDict:
74
+ """
75
+ :return: A structured dict representing this `VersionAnnotation`
76
+
77
+ :raises AttributeError: The name of the calling function has not been set; use `set_calling_function()`
78
+ :raises AttributeError: The name of the calling agent has not been set; use `set_calling_agent()`
79
+ """
80
+ if not self.calling_function:
81
+ raise AttributeError(
82
+ "The name of the calling function has not been set; use set_calling_function()",
83
+ )
84
+
85
+ if not self.calling_agent:
86
+ raise AttributeError(
87
+ "The name of the calling agent has not been set; use set_calling_agent()",
88
+ )
89
+
90
+ annotation_data: AnnotationDataDict = {
91
+ "type": self.version_type.value,
92
+ "calling_function": self.calling_function,
93
+ "calling_agent": self.calling_agent,
94
+ "automated": self.automated,
95
+ }
96
+
97
+ if self.message:
98
+ annotation_data["message"] = self.message
99
+
100
+ if self.payload:
101
+ annotation_data["payload"] = self.payload
102
+
103
+ return annotation_data
104
+
105
+ @property
106
+ def as_json(self) -> str:
107
+ """Render the structured annotation data as JSON, so it can be stored in the MarkLogic dls:annotation field.
108
+
109
+ :return: A JSON string representing this `VersionAnnotation`"""
110
+
111
+ return json.dumps(self.structured_annotation_dict)
112
+
113
+ def __str__(self) -> str:
114
+ return self.as_json
@@ -2,7 +2,7 @@ import os
2
2
 
3
3
  from lxml import etree
4
4
 
5
- from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings
5
+ from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes
6
6
 
7
7
 
8
8
  def _xslt_path(xslt_file_name: str) -> str:
@@ -50,6 +50,9 @@ class XML:
50
50
  ) -> list[str]:
51
51
  return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
52
52
 
53
+ def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
54
+ return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
55
+
53
56
  def _modified(
54
57
  self,
55
58
  xslt: str,
@@ -46,7 +46,10 @@ class IdentifierSchema(ABC):
46
46
  """ Should editors be allowed to manually manipulate identifiers under this schema? """
47
47
 
48
48
  require_globally_unique: bool = True
49
- """ Must this identifier be globally unique? """
49
+ """ Must this identifier be globally unique? (appear on no other documents) """
50
+
51
+ allow_multiple: bool = False
52
+ """ May documents have more than one non-deprecated identifier of this type? """
50
53
 
51
54
  document_types: Optional[list[str]] = None
52
55
  """
@@ -43,6 +43,8 @@ class IdentifiersCollection(dict[str, Identifier]):
43
43
  """Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
44
44
 
45
45
  for schema, identifiers in self._list_all_identifiers_by_schema().items():
46
+ if schema.allow_multiple:
47
+ continue
46
48
  non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
47
49
  if len(non_deprecated_identifiers) > 1:
48
50
  return SuccessFailureMessageTuple(
@@ -2,6 +2,7 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import uuid
5
+ from collections.abc import Callable
5
6
  from typing import Any, Literal, Optional, TypedDict, overload
6
7
 
7
8
  import boto3
@@ -118,11 +119,20 @@ def generate_pdf_url(uri: DocumentURIString) -> str:
118
119
 
119
120
 
120
121
  def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
122
+ delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: True)
123
+
124
+
125
+ def delete_some_from_bucket(
126
+ uri: DocumentURIString, bucket: str, filter: Callable[[ObjectIdentifierTypeDef], bool]
127
+ ) -> None:
121
128
  client = create_s3_client()
122
129
  response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
123
130
 
124
131
  if response.get("Contents"):
125
- objects_to_delete: list[ObjectIdentifierTypeDef] = [{"Key": obj["Key"]} for obj in response.get("Contents", [])]
132
+ objects_to_maybe_delete: list[ObjectIdentifierTypeDef] = [
133
+ {"Key": obj["Key"]} for obj in response.get("Contents", [])
134
+ ]
135
+ objects_to_delete = [obj for obj in objects_to_maybe_delete if filter(obj)]
126
136
  client.delete_objects(
127
137
  Bucket=bucket,
128
138
  Delete={
@@ -131,6 +141,10 @@ def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
131
141
  )
132
142
 
133
143
 
144
+ def delete_non_targz_from_bucket(uri: DocumentURIString, bucket: str) -> None:
145
+ delete_some_from_bucket(uri=uri, bucket=bucket, filter=lambda x: not x["Key"].endswith(".tar.gz"))
146
+
147
+
134
148
  def publish_documents(uri: DocumentURIString) -> None:
135
149
  """
136
150
  Copy assets from the unpublished bucket to the published one.
@@ -217,6 +231,26 @@ def copy_assets(old_uri: DocumentURIString, new_uri: DocumentURIString) -> None:
217
231
  )
218
232
 
219
233
 
234
+ def are_unpublished_assets_clean(uri: DocumentURIString) -> bool:
235
+ """Returns true if all non-tar.gz assets in the relevant S3 bucket have been cleaned
236
+ (they have a DOCUMENT_PROCESSOR_VERSION tag)
237
+ Note: if there are no assets, then this returns true."""
238
+ client = create_s3_client()
239
+ bucket = env("PRIVATE_ASSET_BUCKET")
240
+ response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
241
+ for result in response.get("Contents", []):
242
+ file_key = str(result["Key"])
243
+ # ignore original tar.gz files
244
+ if file_key.endswith(".tar.gz"):
245
+ continue
246
+
247
+ # check if assets are tagged as being processed by S3
248
+ tag_response = client.get_object_tagging(Bucket=bucket, Key=file_key)
249
+ if not (any(tag["Key"] == "DOCUMENT_PROCESSOR_VERSION" for tag in tag_response["TagSet"])):
250
+ return False
251
+ return True
252
+
253
+
220
254
  def build_new_key(old_key: str, new_uri: DocumentURIString) -> str:
221
255
  """Ensure that DOCX and PDF filenames are modified to reflect their new home
222
256
  as we get the name of the new S3 path"""
@@ -15,6 +15,8 @@ class SearchParameters:
15
15
  judge: Optional[str] = None
16
16
  party: Optional[str] = None
17
17
  neutral_citation: Optional[str] = None
18
+ document_name: Optional[str] = None
19
+ consignment_number: Optional[str] = None
18
20
  specific_keyword: Optional[str] = None
19
21
  order: Optional[str] = None
20
22
  date_from: Optional[str] = None
@@ -39,6 +41,8 @@ class SearchParameters:
39
41
  "q": str(self.query or ""),
40
42
  "party": str(self.party or ""),
41
43
  "neutral_citation": str(self.neutral_citation or ""),
44
+ "document_name": str(self.document_name or ""),
45
+ "consignment_number": str(self.consignment_number or ""),
42
46
  "specific_keyword": str(self.specific_keyword or ""),
43
47
  "order": str(self.order or ""),
44
48
  "from": str(self.date_from or ""),
caselawclient/types.py CHANGED
@@ -1,4 +1,10 @@
1
- from typing import NamedTuple
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class DocumentCategory:
6
+ name: str
7
+ subcategories: list["DocumentCategory"] = field(default_factory=list)
2
8
 
3
9
 
4
10
  class InvalidDocumentURIException(Exception):
@@ -63,9 +69,40 @@ class DocumentIdentifierValue(str):
63
69
  pass
64
70
 
65
71
 
66
- SuccessFailureMessageTuple = NamedTuple("SuccessFailureMessageTuple", [("success", bool), ("messages", list[str])])
67
- """
68
- A tuple used to return if an operation has succeeded or failed (and optionally a list of messages associated with that operation).
72
+ class SuccessFailureMessageTuple(tuple[bool, list[str]]):
73
+ """
74
+ Return whether an operation has succeeded or failed
75
+ (and optionally a list of messages associated with that operation).
76
+ Typically the messages will be exposed to the end-user.
77
+ Use only where a failure is a routine event (such as during validation).
78
+ """
79
+
80
+ def __new__(cls, success: bool, messages: list[str]) -> "SuccessFailureMessageTuple":
81
+ return super().__new__(cls, [success, messages])
82
+
83
+ @property
84
+ def success(self) -> bool:
85
+ return self[0]
86
+
87
+ @property
88
+ def messages(self) -> list[str]:
89
+ return self[1]
90
+
91
+ def __repr__(self) -> str:
92
+ return f"SuccessFailureMessageTuple({self.success!r}, {self.messages!r})"
93
+
94
+ def __bool__(self) -> bool:
95
+ return self.success
96
+
97
+ def __or__(self, other: "SuccessFailureMessageTuple") -> "SuccessFailureMessageTuple":
98
+ """Allows us to write combined_tuple = first_tuple | second_tuple"""
99
+ return SuccessFailureMessageTuple(self.success and other.success, self.messages + other.messages)
100
+
101
+
102
+ def SuccessTuple() -> SuccessFailureMessageTuple:
103
+ return SuccessFailureMessageTuple(True, [])
104
+
69
105
 
70
- This should only be used where a failure is considered a routine part of the application (eg during validation options); where an unexpected action has led to a failure the application should raise an appropriate exception.
71
- """
106
+ def FailureTuple(message: str | list[str]) -> SuccessFailureMessageTuple:
107
+ messages = message if isinstance(message, list) else [message]
108
+ return SuccessFailureMessageTuple(success=False, messages=messages)
@@ -7,9 +7,25 @@ DEFAULT_NAMESPACES = {
7
7
  "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
8
8
  }
9
9
 
10
+ # _Element is the only class lxml exposes, so need to use the private class for typing
11
+ Element = etree._Element # noqa: SLF001
12
+
13
+
14
+ def get_xpath_nodes(
15
+ node: Element,
16
+ path: str,
17
+ namespaces: Optional[Dict[str, str]] = None,
18
+ ) -> list[Element]:
19
+ result = node.xpath(path, namespaces=namespaces)
20
+
21
+ if not isinstance(result, list) or not all(isinstance(x, Element) for x in result):
22
+ raise TypeError(f"Expected to return list[Element], got {type(result).__name__}")
23
+
24
+ return result
25
+
10
26
 
11
27
  def get_xpath_match_string(
12
- node: etree._Element,
28
+ node: Element,
13
29
  path: str,
14
30
  namespaces: Optional[Dict[str, str]] = None,
15
31
  fallback: str = "",
@@ -18,7 +34,7 @@ def get_xpath_match_string(
18
34
 
19
35
 
20
36
  def get_xpath_match_strings(
21
- node: etree._Element,
37
+ node: Element,
22
38
  path: str,
23
39
  namespaces: Optional[Dict[str, str]] = None,
24
40
  ) -> list[str]:
@@ -0,0 +1,15 @@
1
+ xquery version "1.0-ml";
2
+ declare namespace akn = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0";
3
+ declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";
4
+ declare variable $uri as xs:string external;
5
+
6
+ let $doc := doc($uri)
7
+ let $hash := $doc//uk:hash/text()
8
+ let $count := count(cts:uris(
9
+ (), (),
10
+ cts:and-query((
11
+ cts:element-value-query(xs:QName("uk:hash"), $hash),
12
+ cts:collection-query("http://marklogic.com/collections/dls/latest-version")
13
+ ))
14
+ ))
15
+ return $count = 1
@@ -11,7 +11,7 @@ declare variable $version_uri as xs:string? external;
11
11
  declare variable $search_query as xs:string? external;
12
12
 
13
13
  (: Note that `xsl:output method` is changed from `html` to `xml` and we've namespaced the tag :)
14
- let $number_marks_xslt := (
14
+ let $delete_meta_marks_xslt := (
15
15
  <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
16
16
  xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn"
17
17
  xmlns:akn="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"
@@ -25,16 +25,6 @@ let $number_marks_xslt := (
25
25
  <xsl:template match="//akn:meta//uk:mark">
26
26
  <xsl:apply-templates />
27
27
  </xsl:template>
28
- <xsl:template match="uk:mark">
29
- <xsl:copy>
30
- <xsl:copy-of select="@*" />
31
- <xsl:attribute name="id">
32
- <xsl:text>mark_</xsl:text>
33
- <xsl:number count="//uk:mark" level="any" from="//*[ancestor::akn:meta]" />
34
- </xsl:attribute>
35
- <xsl:apply-templates />
36
- </xsl:copy>
37
- </xsl:template>
38
28
  </xsl:stylesheet>
39
29
  )
40
30
 
@@ -56,7 +46,7 @@ let $raw_xml := if ($show_unpublished) then
56
46
  (: If a search query string is present, highlight instances :)
57
47
  let $transformed := if($search_query) then
58
48
  xdmp:xslt-eval(
59
- $number_marks_xslt,
49
+ $delete_meta_marks_xslt,
60
50
  cts:highlight(
61
51
  $raw_xml,
62
52
  helper:make-q-query($search_query),
@@ -0,0 +1,26 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare variable $target_major_version as xs:int external;
4
+ declare variable $target_minor_version as xs:int external;
5
+
6
+ xdmp:to-json(xdmp:sql(
7
+ "SELECT COUNT(*) as count
8
+ FROM (
9
+ SELECT
10
+ process_data.uri,
11
+ parser_version_string, parser_major_version, parser_minor_version,
12
+ DATEDIFF('minute', last_sent_to_parser, CURRENT_TIMESTAMP) AS minutes_since_parse_request
13
+ FROM documents.process_data
14
+ JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
15
+ )
16
+ WHERE (
17
+ (parser_version_string IS NULL) OR
18
+ (parser_major_version <= @target_major_version AND parser_minor_version < @target_minor_version)
19
+ )
20
+ AND (minutes_since_parse_request > 43200 OR minutes_since_parse_request IS NULL)",
21
+ "array",
22
+ map:new((
23
+ map:entry("target_major_version", $target_major_version),
24
+ map:entry("target_minor_version", $target_minor_version)
25
+ ))
26
+ ))
@@ -19,7 +19,7 @@ xdmp:to-json(xdmp:sql(
19
19
  (parser_major_version <= @target_major_version AND parser_minor_version < @target_minor_version)
20
20
  )
21
21
  AND (minutes_since_parse_request > 43200 OR minutes_since_parse_request IS NULL)
22
- ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC
22
+ ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC, minutes_since_parse_request DESC
23
23
  LIMIT @maximum_records",
24
24
  "array",
25
25
  map:new((
@@ -28,4 +28,4 @@ xdmp:to-json(xdmp:sql(
28
28
  map:entry("maximum_records", $maximum_records)
29
29
 
30
30
  ))
31
- ))
31
+ ))
@@ -17,27 +17,6 @@ let $xsl_path := fn:concat("judgments/xslts/", $xsl_filename)
17
17
 
18
18
  let $params := map:map()
19
19
 
20
- let $number_marks_xslt := (
21
- <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
22
- version="2.0">
23
- <xsl:output method="html" />
24
- <xsl:template match="@*|node()">
25
- <xsl:copy>
26
- <xsl:apply-templates select="@*|node()"/>
27
- </xsl:copy>
28
- </xsl:template>
29
- <xsl:template match="mark">
30
- <xsl:copy>
31
- <xsl:copy-of select="*" />
32
- <xsl:attribute name="id">
33
- <xsl:text>mark_</xsl:text>
34
- <xsl:value-of select="count(preceding::mark)"/>
35
- </xsl:attribute>
36
- <xsl:apply-templates />
37
- </xsl:copy>
38
- </xsl:template>
39
- </xsl:stylesheet>
40
- )
41
20
  (: change the image-base of the document to match the location of the assets in $image_base
42
21
  so that references to images point to the correct places on the internet :)
43
22
  let $_put := map:put(
@@ -59,13 +38,10 @@ let $retrieved_value := if (xs:boolean($is_published) or $show_unpublished) then
59
38
  ()
60
39
 
61
40
  let $return_value := if($query) then
62
- xdmp:xslt-eval(
63
- $number_marks_xslt,
64
- cts:highlight(
65
- $retrieved_value,
66
- helper:make-q-query($query),
67
- <mark>{$cts:text}</mark>
68
- )
41
+ cts:highlight(
42
+ $retrieved_value,
43
+ helper:make-q-query($query),
44
+ <mark>{$cts:text}</mark>
69
45
  )
70
46
  else
71
47
  $retrieved_value
@@ -23,6 +23,11 @@ class BreakJudgmentCheckoutDict(MarkLogicAPIDict):
23
23
  uri: MarkLogicDocumentURIString
24
24
 
25
25
 
26
+ # check_content_hash_unique_by_uri.xqy
27
+ class CheckContentHashUniqueByUriDict(MarkLogicAPIDict):
28
+ uri: MarkLogicDocumentURIString
29
+
30
+
26
31
  # checkin_judgment.xqy
27
32
  class CheckinJudgmentDict(MarkLogicAPIDict):
28
33
  uri: MarkLogicDocumentURIString
@@ -100,8 +105,14 @@ class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
100
105
  target_parser_minor_version: int
101
106
 
102
107
 
103
- # get_pending_parse_for_version.xqy
104
- class GetPendingParseForVersionDict(MarkLogicAPIDict):
108
+ # get_pending_parse_for_version_count.xqy
109
+ class GetPendingParseForVersionCountDict(MarkLogicAPIDict):
110
+ target_major_version: int
111
+ target_minor_version: int
112
+
113
+
114
+ # get_pending_parse_for_version_documents.xqy
115
+ class GetPendingParseForVersionDocumentsDict(MarkLogicAPIDict):
105
116
  maximum_records: Optional[int]
106
117
  target_major_version: int
107
118
  target_minor_version: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 40.0.0
3
+ Version: 44.0.3
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -9,7 +9,7 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python :: 3.13
11
11
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
12
- Requires-Dist: certifi (>=2025.8.3,<2025.9.0)
12
+ Requires-Dist: certifi (>=2025.11.12,<2025.12.0)
13
13
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
14
14
  Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
15
15
  Requires-Dist: django-environ (>=0.12.0)
@@ -19,6 +19,7 @@ Requires-Dist: lxml (>=6.0.0,<7.0.0)
19
19
  Requires-Dist: memoization (>=0.4.0,<0.5.0)
20
20
  Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
21
21
  Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
22
+ Requires-Dist: pydantic (>=2.12.3,<3.0.0)
22
23
  Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
23
24
  Requires-Dist: pytz (>2024)
24
25
  Requires-Dist: requests (>=2.28.2,<3.0.0)
@@ -1,21 +1,25 @@
1
- caselawclient/Client.py,sha256=tof_riZN5c1x3_cYScfIDiGaPIYt1EuAtKOpg3qI_cg,46880
1
+ caselawclient/Client.py,sha256=aKe_SvSXpSIgHCc53DfQY6mgK4QyrUDqyPWLByzGxLI,48138
2
2
  caselawclient/__init__.py,sha256=QZtsOB_GR5XfFnWMJ6E9_fBany-JXFIrQmzs1mD_KVg,1225
3
- caselawclient/client_helpers/__init__.py,sha256=eucyUXwUqI72TPw-C5zLcHlMu4GtFY507a6lQc03lQY,5053
3
+ caselawclient/client_helpers/__init__.py,sha256=tpXWjwBAqOf8ChtSiEeMhdkiO7tVbfZ4FfQXsXaGJlI,1221
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
5
5
  caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
6
6
  caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
7
- caselawclient/factories.py,sha256=eGj9TiZpmF3todW-08Ps7bHNMvByHqwEbgujRhvU_Yc,7382
7
+ caselawclient/factories.py,sha256=5AiRrvtnvCkvHLU9SzD9MRZPKspEomFw9M2LDZjveJY,7501
8
8
  caselawclient/identifier_resolution.py,sha256=B5I1sD7o7YjzsXMECjbKjgiGLDda5bGhejsJ-lYpTIg,2429
9
+ caselawclient/managers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ caselawclient/managers/merge/__init__.py,sha256=Rd6YIGifT3TP6UOf0gBrRoYzK5MJqTPeRaIJznS8dgI,2078
11
+ caselawclient/managers/merge/checks.py,sha256=J9RBG6jZAKIZk27jdFq-BByoRLKWsViCfHDyA8ZM3qU,3205
9
12
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
10
- caselawclient/models/documents/__init__.py,sha256=ugbklabbI1LcoN5mE6rffYDApUOtkTXBzYH3Rv2fhBE,22667
11
- caselawclient/models/documents/body.py,sha256=O1ZTV3KHo-YNi7Syd4oCV1CVSuRF7mcLXojwshyY4jg,6601
13
+ caselawclient/models/documents/__init__.py,sha256=nC1CoYUDVAlAgLT4vWdqmNj3yrpoUs7C73H588_PTSA,25185
14
+ caselawclient/models/documents/body.py,sha256=IGRJcolJlkW_KWufNy4lLB97uPf9Ghhn6i_s0sptUrk,8286
12
15
  caselawclient/models/documents/comparison.py,sha256=KwFZQByOcYcZKe8csjAntttACKq4BZb28n2VeV5rK54,1355
13
16
  caselawclient/models/documents/exceptions.py,sha256=te7PPQTDHjZ9EYVg5pVaiZfF00lMBFy333PHj8_mkC4,443
14
17
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
15
18
  caselawclient/models/documents/transforms/html.xsl,sha256=XyUQLFcJ7_GwthWQ6ShU0bmzrgpl7xDFU-U8VLgOvEs,38258
16
- caselawclient/models/documents/xml.py,sha256=BVra2VL_0JyImM8GC3wdouu1tApy79C-e2dHvQyrXPE,2195
17
- caselawclient/models/identifiers/__init__.py,sha256=bcXiree1FKIlJklWggvS_IKMHMppAjDbadOpxCJx3yw,7727
18
- caselawclient/models/identifiers/collection.py,sha256=kGlziJiLAqoyd6LaaZ5tsgUf2fD6Y-7fv1It9S4-Otw,7448
19
+ caselawclient/models/documents/versions.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
20
+ caselawclient/models/documents/xml.py,sha256=uGRULm_XcA9ABZmwTxxwwysPItQl1qnMd2pUVTZprgc,2376
21
+ caselawclient/models/identifiers/__init__.py,sha256=Vp5zJdJSskCuUOUwmPDiDvVlNsYmPRH350-wRx7Q8Dc,7877
22
+ caselawclient/models/identifiers/collection.py,sha256=1fw9yAuHBBMCgAfYRwgpoIPHW_vWQA-eCGDBnWI-gWI,7511
19
23
  caselawclient/models/identifiers/exceptions.py,sha256=6LVjvx-UOwqkrpxU19ydmrphKNw0rcG5GXwjTFyf8Dk,130
20
24
  caselawclient/models/identifiers/fclid.py,sha256=hj8z-VhXFrUHKOY6k_ItPvOakIvbhJ5xEbZ04E2j7t8,1521
21
25
  caselawclient/models/identifiers/neutral_citation.py,sha256=bYAeXHVm_ls0aDTeYI4uv35iZmJGSKU4-H-iLh2xED0,2912
@@ -26,7 +30,7 @@ caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_Jfwkbg
26
30
  caselawclient/models/parser_logs.py,sha256=iOhKTAAi87XQvxz1DHjF2lrqScD19g_c8EjSf0vPdfs,364
27
31
  caselawclient/models/press_summaries.py,sha256=rtrYs_3BazUXxdA2oYmIJ6YIAiVlKeyc1aSF9uvkJJU,2196
28
32
  caselawclient/models/utilities/__init__.py,sha256=LPhyrQwLKc5tIJUO8Bysn9wCiR6Z6jMMTksjOV4JH9U,1041
29
- caselawclient/models/utilities/aws.py,sha256=_JUoJatfC1m_etT5MDwbCrOHpdqqTRqGCyzzNiW1WRA,8660
33
+ caselawclient/models/utilities/aws.py,sha256=s86_kOpnyc-seefy7eZQsnE4v3b5TZCAVhESiDEpqx8,10084
30
34
  caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
31
35
  caselawclient/models/utilities/move.py,sha256=MXdUqkSiyqRb8YKs_66B6ICWn8EWM6DiJV95fuJO1Us,3610
32
36
  caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,10 +38,11 @@ caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLL
34
38
  caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
35
39
  caselawclient/responses/search_result.py,sha256=glcoCeo2xO-17aV2pcpyfgl0_UjjEUqHfm2kVylXCwk,9782
36
40
  caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
37
- caselawclient/search_parameters.py,sha256=A-9icXdyFYLDACjUaRQF8mrnaVRlFJ9XCPtu5uZ-_Lo,3484
38
- caselawclient/types.py,sha256=5lE_0kRd7ZMUkr53Rh4ooo74Ab8JTXnMo8rNrwn_HUo,2578
39
- caselawclient/xml_helpers.py,sha256=xCboRhhzezqh-VyoKBQwal5lwxv96vTMJyVFWJNN-ok,639
41
+ caselawclient/search_parameters.py,sha256=BQzDrfxqyZTkqgNj8Ruy-lSr_m4bYrUzUKrqCxB8GrM,3699
42
+ caselawclient/types.py,sha256=Q1RE2HMZUGKTuDzg9wARq9AuVaIIlDyYFE4tqhpcN_U,3630
43
+ caselawclient/xml_helpers.py,sha256=31cxsDu680SFi3gR35rL7EdBZaW6r6mt4zvWHjJeX9o,1131
40
44
  caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
45
+ caselawclient/xquery/check_content_hash_unique_by_uri.xqy,sha256=kXfJL0sclcCulsaw5KGgVCyuiIfINkSeMwFGXSvuYME,494
41
46
  caselawclient/xquery/checkin_judgment.xqy,sha256=QeGqO3kL-q0UrjopCVU0lCbkwbyoc5SuNLYFAIbbyMg,197
42
47
  caselawclient/xquery/checkout_judgment.xqy,sha256=aRwVo4KXoEKXfXRZ6IrVfvh0pXK-7pFxVIgEyzE5DRY,385
43
48
  caselawclient/xquery/copy_document.xqy,sha256=GwgafibZhUB4rZ7x5wmHAKi0DO1aEWNVithkguwsVGE,453
@@ -48,14 +53,15 @@ caselawclient/xquery/get_combined_stats_table.xqy,sha256=cclNqSzIB6sX6A_hgVOFZon
48
53
  caselawclient/xquery/get_components_for_document.xqy,sha256=qBOn5OI7ThK0OHizSm68oySfAdp2dsHFJaIMTI4iIC0,769
49
54
  caselawclient/xquery/get_highest_enrichment_version.xqy,sha256=a0dwVmEZuIMyRjIlvenSmbOaaN0WvgaCZvMtVWoLulQ,247
50
55
  caselawclient/xquery/get_highest_parser_version.xqy,sha256=LW3iSg4eWArbfBaCVWWOpr4MoUcDBz514nV48ElOsAM,247
51
- caselawclient/xquery/get_judgment.xqy,sha256=34fMHSBNPcXY3u3QIX3Bul8DMuWyGGHrhuYTzxGt3wk,2420
56
+ caselawclient/xquery/get_judgment.xqy,sha256=YMDDTOvT2FaYXqhYPcJVNn95czDoDojauVJiSj1bVG0,2074
52
57
  caselawclient/xquery/get_judgment_checkout_status.xqy,sha256=mdY9UXLyzQdB7byEERPqentlr0YDLbXRVqH0h4UuZTQ,193
53
58
  caselawclient/xquery/get_judgment_version.xqy,sha256=wF9k9-CBrqo8VbxxyTrD-AGzR3-3jMm25tRVCjxPLrU,292
54
59
  caselawclient/xquery/get_last_modified.xqy,sha256=8fCm_7o_kkytCEmEeSTLWzLP7iOjuPV01IfHDgf6HaQ,172
55
60
  caselawclient/xquery/get_missing_fclid.xqy,sha256=FAZZMtqow0VAf1D9LjBydT9kcOxiEIQC7GQgs4o68sA,520
56
61
  caselawclient/xquery/get_next_document_sequence_number.xqy,sha256=LkGoaS7jZfaKDuZLi0apP5qHP1rpcM1HbqX3RUwquKY,450
57
62
  caselawclient/xquery/get_pending_enrichment_for_version.xqy,sha256=8J5Pi-jMXJd_BgtpK4g6C9uR99HP57JpFv5WkoPfNuo,2016
58
- caselawclient/xquery/get_pending_parse_for_version.xqy,sha256=9cjVZtHeBBjm-a7RMsn1PVJt_Ug78YFlmp5CN8VJ1jQ,1197
63
+ caselawclient/xquery/get_pending_parse_for_version_count.xqy,sha256=8OrpwjfW3daG1wPox9Xyc5A6kqRfKFQZ8ogTqB8VIPA,928
64
+ caselawclient/xquery/get_pending_parse_for_version_documents.xqy,sha256=g5r5Lnyi3r7eNOnX0eIYRTVBZjPd43KQRyrIAS8pIZY,1232
59
65
  caselawclient/xquery/get_properties_for_search_results.xqy,sha256=Tlv3EKwVV_q-JyQyhjWVHIleicPDpucxP4ScuQjpgSw,625
60
66
  caselawclient/xquery/get_property.xqy,sha256=RHlOTrK0aH-S7s_ykYzGmUeKOJxXlI4vE5sKRt556NY,209
61
67
  caselawclient/xquery/get_property_as_node.xqy,sha256=7EXNgjVD1QugJ1621pvg8PdjBRIuh7GugwARv04TuBk,202
@@ -84,11 +90,11 @@ caselawclient/xquery/user_has_role.xqy,sha256=52YuFZnXqaDDJs-j_UanpqcLNEiw_m9xb0
84
90
  caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4tLKQ4FTssRihR07qFk,156
85
91
  caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
86
92
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
87
- caselawclient/xquery/xslt_transform.xqy,sha256=cccaFiGkCcvSfDv007UriZ3I4ak2nTLP1trRZdbOoS8,2462
88
- caselawclient/xquery_type_dicts.py,sha256=ROwV-BpQYDOrtW6aKx5MXVi85SW5J5oGgyQT7-Ra1sA,6633
93
+ caselawclient/xquery/xslt_transform.xqy,sha256=3X8f7u5kRXKRMwnfZ2AO60LS9F3Gi3mFp_MHrw95x0w,1745
94
+ caselawclient/xquery_type_dicts.py,sha256=caNLrQBytQFxfdVs5gpSTQEo-FEldKITZDqZtITKWJQ,6950
89
95
  caselawclient/xslt/modify_xml_live.xsl,sha256=gNjwBun2-UzOeeuf0wNjFtN3jXm1yrwqv_KT8r1slXw,2370
90
96
  caselawclient/xslt/sample.xsl,sha256=IG-v77stjwqiw25pguh391K-5DTKiX651WqILDZixm0,825
91
- ds_caselaw_marklogic_api_client-40.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
92
- ds_caselaw_marklogic_api_client-40.0.0.dist-info/METADATA,sha256=Nxrgxi6sCKcfetI5h5k0PcwLkkkUQYs2e8nbtj-PUQY,4364
93
- ds_caselaw_marklogic_api_client-40.0.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
94
- ds_caselaw_marklogic_api_client-40.0.0.dist-info/RECORD,,
97
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
98
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/METADATA,sha256=32qweXun6XgSOZhqDF4y-wKtPV9y90LrQsrL-IaSVv8,4409
99
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
100
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/RECORD,,