ds-caselaw-marklogic-api-client 27.0.1__py3-none-any.whl → 27.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
caselawclient/Client.py CHANGED
@@ -403,6 +403,7 @@ class MarklogicApiClient:
403
403
  judgment_uri: DocumentURIString,
404
404
  version_uri: Optional[DocumentURIString] = None,
405
405
  show_unpublished: bool = False,
406
+ search_query: Optional[str] = None,
406
407
  ) -> bytes:
407
408
  marklogic_document_uri = self._format_uri_for_marklogic(judgment_uri)
408
409
  marklogic_document_version_uri = (
@@ -418,6 +419,7 @@ class MarklogicApiClient:
418
419
  "uri": marklogic_document_uri,
419
420
  "version_uri": marklogic_document_version_uri,
420
421
  "show_unpublished": show_unpublished,
422
+ "search_query": search_query,
421
423
  }
422
424
 
423
425
  response = self._eval_as_bytes(vars, "get_judgment.xqy")
@@ -433,11 +435,13 @@ class MarklogicApiClient:
433
435
  judgment_uri: DocumentURIString,
434
436
  version_uri: Optional[DocumentURIString] = None,
435
437
  show_unpublished: bool = False,
438
+ search_query: Optional[str] = None,
436
439
  ) -> str:
437
440
  return self.get_judgment_xml_bytestring(
438
441
  judgment_uri,
439
442
  version_uri,
440
443
  show_unpublished,
444
+ search_query=search_query,
441
445
  ).decode(encoding="utf-8")
442
446
 
443
447
  def set_document_name(
@@ -634,12 +638,14 @@ class MarklogicApiClient:
634
638
  judgment_uri: DocumentURIString,
635
639
  annotation: str = "",
636
640
  expires_at_midnight: bool = False,
641
+ timeout_seconds: int = -1,
637
642
  ) -> requests.Response:
643
+ """If timeout_seconds is -1, the lock never times out"""
638
644
  uri = self._format_uri_for_marklogic(judgment_uri)
639
645
  vars: query_dicts.CheckoutJudgmentDict = {
640
646
  "uri": uri,
641
647
  "annotation": annotation,
642
- "timeout": -1,
648
+ "timeout": timeout_seconds,
643
649
  }
644
650
 
645
651
  if expires_at_midnight:
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Any, Optional, cast
2
+ from typing import Any, Optional
3
3
  from unittest.mock import Mock
4
4
 
5
5
  from typing_extensions import TypeAlias
@@ -75,55 +75,24 @@ class DocumentFactory:
75
75
 
76
76
 
77
77
  class JudgmentFactory(DocumentFactory):
78
- target_class = Judgment
79
-
80
- def __init__(self) -> None:
81
- self.PARAMS_MAP = self.PARAMS_MAP | {
82
- "neutral_citation": "[2023] Test 123",
83
- "best_human_identifier": "[2023] Test 123",
84
- }
85
-
86
- super().__init__()
87
-
88
- @classmethod
89
- def build(
90
- cls,
91
- uri: str = "test/2023/123",
92
- html: str = "<p>This is a judgment.</p>",
93
- api_client: Optional[MarklogicApiClient] = None,
94
- **kwargs: Any,
95
- ) -> Judgment:
96
- return cast(Judgment, super().build(uri, html, api_client, **kwargs))
78
+ target_class: TypeAlias = Judgment
79
+ PARAMS_MAP = DocumentFactory.PARAMS_MAP | {
80
+ "neutral_citation": "[2023] Test 123",
81
+ }
97
82
 
98
83
 
99
84
  class PressSummaryFactory(DocumentFactory):
100
- target_class = PressSummary
101
-
102
- def __init__(self) -> None:
103
- self.PARAMS_MAP = self.PARAMS_MAP | {
104
- "neutral_citation": "[2023] Test 123",
105
- "best_human_identifier": "[2023] Test 123",
106
- }
107
-
108
- super().__init__()
109
-
110
- @classmethod
111
- def build(
112
- cls,
113
- uri: str = "test/2023/123/press-summary/1",
114
- html: str = "<p>This is a judgment.</p>",
115
- api_client: Optional[MarklogicApiClient] = None,
116
- **kwargs: Any,
117
- ) -> PressSummary:
118
- return cast(PressSummary, super().build(uri, html, api_client, **kwargs))
85
+ target_class: TypeAlias = PressSummary
86
+ PARAMS_MAP = DocumentFactory.PARAMS_MAP | {
87
+ "neutral_citation": "[2023] Test 123",
88
+ }
119
89
 
120
90
 
121
91
  class SimpleFactory:
92
+ target_class: TypeAlias = object
122
93
  # "name_of_attribute": "default value"
123
94
  PARAMS_MAP: dict[str, Any]
124
95
 
125
- target_class: TypeAlias = object
126
-
127
96
  @classmethod
128
97
  def build(cls, **kwargs: Any) -> target_class:
129
98
  mock_object = Mock(spec=cls.target_class, autospec=True)
@@ -145,6 +114,7 @@ class SearchResultMetadataFactory(SimpleFactory):
145
114
  "author_email": "fake.email@gov.invalid",
146
115
  "consignment_reference": "TDR-2023-ABC",
147
116
  "submission_datetime": datetime.datetime(2023, 2, 3, 9, 12, 34),
117
+ "editor_status": "New",
148
118
  }
149
119
 
150
120
 
@@ -105,7 +105,7 @@ class Document:
105
105
  Individual document classes should extend this list where necessary to validate document type-specific attributes.
106
106
  """
107
107
 
108
- def __init__(self, uri: str, api_client: "MarklogicApiClient"):
108
+ def __init__(self, uri: str, api_client: "MarklogicApiClient", search_query: Optional[str] = None):
109
109
  """
110
110
  :param uri: For historical reasons this accepts a pseudo-URI which may include leading or trailing slashes.
111
111
 
@@ -117,7 +117,11 @@ class Document:
117
117
  raise DocumentNotFoundError(f"Document {self.uri} does not exist")
118
118
 
119
119
  self.body: DocumentBody = DocumentBody(
120
- xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
120
+ xml_bytestring=self.api_client.get_judgment_xml_bytestring(
121
+ self.uri,
122
+ show_unpublished=True,
123
+ search_query=search_query,
124
+ ),
121
125
  )
122
126
  """ `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
123
127
 
@@ -495,3 +499,10 @@ class Document:
495
499
  Is it sensible to reparse this document?
496
500
  """
497
501
  return self.docx_exists()
502
+
503
+ def __getattr__(self, name: str) -> Any:
504
+ warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
505
+ try:
506
+ return getattr(self.body, name)
507
+ except Exception:
508
+ raise AttributeError(f"Neither 'Document' nor 'DocumentBody' objects have an attribute '{name}'")
@@ -1,10 +1,12 @@
1
1
  import datetime
2
+ import os
2
3
  import warnings
3
- from functools import cached_property
4
+ from functools import cache, cached_property
4
5
  from typing import Optional
5
6
 
6
7
  import pytz
7
8
  from ds_caselaw_utils.types import CourtCode
9
+ from saxonche import PySaxonProcessor
8
10
 
9
11
  from caselawclient.models.utilities.dates import parse_string_date_as_utc
10
12
 
@@ -129,6 +131,23 @@ class DocumentBody:
129
131
  def content_as_xml(self) -> str:
130
132
  return self._xml.xml_as_string
131
133
 
134
+ @cache
135
+ def content_as_html(self, image_base_url: Optional[str] = None) -> str:
136
+ """Convert the XML representation of the Document into HTML for rendering."""
137
+
138
+ html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
139
+
140
+ with PySaxonProcessor() as proc:
141
+ xslt_processor = proc.new_xslt30_processor()
142
+ document = proc.parse_xml(xml_text=self._xml.xml_as_string)
143
+
144
+ executable = xslt_processor.compile_stylesheet(stylesheet_file=html_xslt_location)
145
+
146
+ if image_base_url:
147
+ executable.set_parameter("image-base", proc.make_string_value(image_base_url))
148
+
149
+ return str(executable.transform_to_string(xdm_node=document))
150
+
132
151
  @cached_property
133
152
  def failed_to_parse(self) -> bool:
134
153
  """