ds-caselaw-marklogic-api-client 27.0.1__py3-none-any.whl → 27.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caselawclient/Client.py +7 -1
- caselawclient/factories.py +11 -41
- caselawclient/models/documents/__init__.py +13 -2
- caselawclient/models/documents/body.py +20 -1
- caselawclient/models/documents/transforms/html.xsl +1070 -0
- caselawclient/xquery/get_judgment.xqy +47 -2
- caselawclient/xquery_type_dicts.py +1 -0
- {ds_caselaw_marklogic_api_client-27.0.1.dist-info → ds_caselaw_marklogic_api_client-27.2.0.dist-info}/METADATA +2 -1
- {ds_caselaw_marklogic_api_client-27.0.1.dist-info → ds_caselaw_marklogic_api_client-27.2.0.dist-info}/RECORD +11 -10
- {ds_caselaw_marklogic_api_client-27.0.1.dist-info → ds_caselaw_marklogic_api_client-27.2.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-27.0.1.dist-info → ds_caselaw_marklogic_api_client-27.2.0.dist-info}/WHEEL +0 -0
caselawclient/Client.py
CHANGED
|
@@ -403,6 +403,7 @@ class MarklogicApiClient:
|
|
|
403
403
|
judgment_uri: DocumentURIString,
|
|
404
404
|
version_uri: Optional[DocumentURIString] = None,
|
|
405
405
|
show_unpublished: bool = False,
|
|
406
|
+
search_query: Optional[str] = None,
|
|
406
407
|
) -> bytes:
|
|
407
408
|
marklogic_document_uri = self._format_uri_for_marklogic(judgment_uri)
|
|
408
409
|
marklogic_document_version_uri = (
|
|
@@ -418,6 +419,7 @@ class MarklogicApiClient:
|
|
|
418
419
|
"uri": marklogic_document_uri,
|
|
419
420
|
"version_uri": marklogic_document_version_uri,
|
|
420
421
|
"show_unpublished": show_unpublished,
|
|
422
|
+
"search_query": search_query,
|
|
421
423
|
}
|
|
422
424
|
|
|
423
425
|
response = self._eval_as_bytes(vars, "get_judgment.xqy")
|
|
@@ -433,11 +435,13 @@ class MarklogicApiClient:
|
|
|
433
435
|
judgment_uri: DocumentURIString,
|
|
434
436
|
version_uri: Optional[DocumentURIString] = None,
|
|
435
437
|
show_unpublished: bool = False,
|
|
438
|
+
search_query: Optional[str] = None,
|
|
436
439
|
) -> str:
|
|
437
440
|
return self.get_judgment_xml_bytestring(
|
|
438
441
|
judgment_uri,
|
|
439
442
|
version_uri,
|
|
440
443
|
show_unpublished,
|
|
444
|
+
search_query=search_query,
|
|
441
445
|
).decode(encoding="utf-8")
|
|
442
446
|
|
|
443
447
|
def set_document_name(
|
|
@@ -634,12 +638,14 @@ class MarklogicApiClient:
|
|
|
634
638
|
judgment_uri: DocumentURIString,
|
|
635
639
|
annotation: str = "",
|
|
636
640
|
expires_at_midnight: bool = False,
|
|
641
|
+
timeout_seconds: int = -1,
|
|
637
642
|
) -> requests.Response:
|
|
643
|
+
"""If timeout_seconds is -1, the lock never times out"""
|
|
638
644
|
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
639
645
|
vars: query_dicts.CheckoutJudgmentDict = {
|
|
640
646
|
"uri": uri,
|
|
641
647
|
"annotation": annotation,
|
|
642
|
-
"timeout":
|
|
648
|
+
"timeout": timeout_seconds,
|
|
643
649
|
}
|
|
644
650
|
|
|
645
651
|
if expires_at_midnight:
|
caselawclient/factories.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from typing import Any, Optional
|
|
2
|
+
from typing import Any, Optional
|
|
3
3
|
from unittest.mock import Mock
|
|
4
4
|
|
|
5
5
|
from typing_extensions import TypeAlias
|
|
@@ -75,55 +75,24 @@ class DocumentFactory:
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class JudgmentFactory(DocumentFactory):
|
|
78
|
-
target_class = Judgment
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"neutral_citation": "[2023] Test 123",
|
|
83
|
-
"best_human_identifier": "[2023] Test 123",
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
super().__init__()
|
|
87
|
-
|
|
88
|
-
@classmethod
|
|
89
|
-
def build(
|
|
90
|
-
cls,
|
|
91
|
-
uri: str = "test/2023/123",
|
|
92
|
-
html: str = "<p>This is a judgment.</p>",
|
|
93
|
-
api_client: Optional[MarklogicApiClient] = None,
|
|
94
|
-
**kwargs: Any,
|
|
95
|
-
) -> Judgment:
|
|
96
|
-
return cast(Judgment, super().build(uri, html, api_client, **kwargs))
|
|
78
|
+
target_class: TypeAlias = Judgment
|
|
79
|
+
PARAMS_MAP = DocumentFactory.PARAMS_MAP | {
|
|
80
|
+
"neutral_citation": "[2023] Test 123",
|
|
81
|
+
}
|
|
97
82
|
|
|
98
83
|
|
|
99
84
|
class PressSummaryFactory(DocumentFactory):
|
|
100
|
-
target_class = PressSummary
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
"neutral_citation": "[2023] Test 123",
|
|
105
|
-
"best_human_identifier": "[2023] Test 123",
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
super().__init__()
|
|
109
|
-
|
|
110
|
-
@classmethod
|
|
111
|
-
def build(
|
|
112
|
-
cls,
|
|
113
|
-
uri: str = "test/2023/123/press-summary/1",
|
|
114
|
-
html: str = "<p>This is a judgment.</p>",
|
|
115
|
-
api_client: Optional[MarklogicApiClient] = None,
|
|
116
|
-
**kwargs: Any,
|
|
117
|
-
) -> PressSummary:
|
|
118
|
-
return cast(PressSummary, super().build(uri, html, api_client, **kwargs))
|
|
85
|
+
target_class: TypeAlias = PressSummary
|
|
86
|
+
PARAMS_MAP = DocumentFactory.PARAMS_MAP | {
|
|
87
|
+
"neutral_citation": "[2023] Test 123",
|
|
88
|
+
}
|
|
119
89
|
|
|
120
90
|
|
|
121
91
|
class SimpleFactory:
|
|
92
|
+
target_class: TypeAlias = object
|
|
122
93
|
# "name_of_attribute": "default value"
|
|
123
94
|
PARAMS_MAP: dict[str, Any]
|
|
124
95
|
|
|
125
|
-
target_class: TypeAlias = object
|
|
126
|
-
|
|
127
96
|
@classmethod
|
|
128
97
|
def build(cls, **kwargs: Any) -> target_class:
|
|
129
98
|
mock_object = Mock(spec=cls.target_class, autospec=True)
|
|
@@ -145,6 +114,7 @@ class SearchResultMetadataFactory(SimpleFactory):
|
|
|
145
114
|
"author_email": "fake.email@gov.invalid",
|
|
146
115
|
"consignment_reference": "TDR-2023-ABC",
|
|
147
116
|
"submission_datetime": datetime.datetime(2023, 2, 3, 9, 12, 34),
|
|
117
|
+
"editor_status": "New",
|
|
148
118
|
}
|
|
149
119
|
|
|
150
120
|
|
|
@@ -105,7 +105,7 @@ class Document:
|
|
|
105
105
|
Individual document classes should extend this list where necessary to validate document type-specific attributes.
|
|
106
106
|
"""
|
|
107
107
|
|
|
108
|
-
def __init__(self, uri: str, api_client: "MarklogicApiClient"):
|
|
108
|
+
def __init__(self, uri: str, api_client: "MarklogicApiClient", search_query: Optional[str] = None):
|
|
109
109
|
"""
|
|
110
110
|
:param uri: For historical reasons this accepts a pseudo-URI which may include leading or trailing slashes.
|
|
111
111
|
|
|
@@ -117,7 +117,11 @@ class Document:
|
|
|
117
117
|
raise DocumentNotFoundError(f"Document {self.uri} does not exist")
|
|
118
118
|
|
|
119
119
|
self.body: DocumentBody = DocumentBody(
|
|
120
|
-
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
120
|
+
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
121
|
+
self.uri,
|
|
122
|
+
show_unpublished=True,
|
|
123
|
+
search_query=search_query,
|
|
124
|
+
),
|
|
121
125
|
)
|
|
122
126
|
""" `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """
|
|
123
127
|
|
|
@@ -495,3 +499,10 @@ class Document:
|
|
|
495
499
|
Is it sensible to reparse this document?
|
|
496
500
|
"""
|
|
497
501
|
return self.docx_exists()
|
|
502
|
+
|
|
503
|
+
def __getattr__(self, name: str) -> Any:
|
|
504
|
+
warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
|
|
505
|
+
try:
|
|
506
|
+
return getattr(self.body, name)
|
|
507
|
+
except Exception:
|
|
508
|
+
raise AttributeError(f"Neither 'Document' nor 'DocumentBody' objects have an attribute '{name}'")
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import datetime
|
|
2
|
+
import os
|
|
2
3
|
import warnings
|
|
3
|
-
from functools import cached_property
|
|
4
|
+
from functools import cache, cached_property
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
import pytz
|
|
7
8
|
from ds_caselaw_utils.types import CourtCode
|
|
9
|
+
from saxonche import PySaxonProcessor
|
|
8
10
|
|
|
9
11
|
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
10
12
|
|
|
@@ -129,6 +131,23 @@ class DocumentBody:
|
|
|
129
131
|
def content_as_xml(self) -> str:
|
|
130
132
|
return self._xml.xml_as_string
|
|
131
133
|
|
|
134
|
+
@cache
|
|
135
|
+
def content_as_html(self, image_base_url: Optional[str] = None) -> str:
|
|
136
|
+
"""Convert the XML representation of the Document into HTML for rendering."""
|
|
137
|
+
|
|
138
|
+
html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
|
|
139
|
+
|
|
140
|
+
with PySaxonProcessor() as proc:
|
|
141
|
+
xslt_processor = proc.new_xslt30_processor()
|
|
142
|
+
document = proc.parse_xml(xml_text=self._xml.xml_as_string)
|
|
143
|
+
|
|
144
|
+
executable = xslt_processor.compile_stylesheet(stylesheet_file=html_xslt_location)
|
|
145
|
+
|
|
146
|
+
if image_base_url:
|
|
147
|
+
executable.set_parameter("image-base", proc.make_string_value(image_base_url))
|
|
148
|
+
|
|
149
|
+
return str(executable.transform_to_string(xdm_node=document))
|
|
150
|
+
|
|
132
151
|
@cached_property
|
|
133
152
|
def failed_to_parse(self) -> bool:
|
|
134
153
|
"""
|