ds-caselaw-marklogic-api-client 29.1.1__py3-none-any.whl → 30.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- caselawclient/Client.py +27 -4
- caselawclient/factories.py +2 -1
- caselawclient/identifier_resolution.py +11 -2
- caselawclient/models/documents/__init__.py +6 -28
- caselawclient/models/documents/body.py +31 -23
- caselawclient/models/documents/exceptions.py +0 -4
- caselawclient/models/judgments.py +3 -1
- caselawclient/models/neutral_citation_mixin.py +3 -6
- caselawclient/models/press_summaries.py +2 -1
- caselawclient/models/utilities/aws.py +27 -20
- caselawclient/models/utilities/move.py +1 -1
- caselawclient/responses/search_result.py +1 -1
- caselawclient/types.py +24 -0
- caselawclient/xquery/resolve_from_identifier_value.xqy +17 -0
- caselawclient/xquery_type_dicts.py +9 -3
- {ds_caselaw_marklogic_api_client-29.1.1.dist-info → ds_caselaw_marklogic_api_client-30.0.0.dist-info}/METADATA +1 -1
- {ds_caselaw_marklogic_api_client-29.1.1.dist-info → ds_caselaw_marklogic_api_client-30.0.0.dist-info}/RECORD +20 -18
- /caselawclient/xquery/{resolve_from_identifier.xqy → resolve_from_identifier_slug.xqy} +0 -0
- {ds_caselaw_marklogic_api_client-29.1.1.dist-info → ds_caselaw_marklogic_api_client-30.0.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-29.1.1.dist-info → ds_caselaw_marklogic_api_client-30.0.0.dist-info}/WHEEL +0 -0
caselawclient/Client.py
CHANGED
|
@@ -25,12 +25,12 @@ from caselawclient.models.documents import (
|
|
|
25
25
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
26
26
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
27
27
|
Document,
|
|
28
|
-
DocumentURIString,
|
|
29
28
|
)
|
|
30
29
|
from caselawclient.models.judgments import Judgment
|
|
31
30
|
from caselawclient.models.press_summaries import PressSummary
|
|
32
31
|
from caselawclient.models.utilities import move
|
|
33
32
|
from caselawclient.search_parameters import SearchParameters
|
|
33
|
+
from caselawclient.types import DocumentURIString
|
|
34
34
|
from caselawclient.xquery_type_dicts import (
|
|
35
35
|
MarkLogicDocumentURIString,
|
|
36
36
|
MarkLogicDocumentVersionURIString,
|
|
@@ -1203,17 +1203,40 @@ class MarklogicApiClient:
|
|
|
1203
1203
|
|
|
1204
1204
|
return results
|
|
1205
1205
|
|
|
1206
|
-
def
|
|
1206
|
+
def resolve_from_identifier_slug(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
|
|
1207
1207
|
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1208
1208
|
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1209
|
-
vars: query_dicts.
|
|
1209
|
+
vars: query_dicts.ResolveFromIdentifierSlugDict = {
|
|
1210
1210
|
"identifier_uri": DocumentURIString(identifier_uri),
|
|
1211
1211
|
"published_only": int(published_only),
|
|
1212
1212
|
}
|
|
1213
1213
|
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1214
1214
|
self._send_to_eval(
|
|
1215
1215
|
vars,
|
|
1216
|
-
"
|
|
1216
|
+
"resolve_from_identifier_slug.xqy",
|
|
1217
|
+
),
|
|
1218
|
+
)
|
|
1219
|
+
return IdentifierResolutions.from_marklogic_output(raw_results)
|
|
1220
|
+
|
|
1221
|
+
def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
|
|
1222
|
+
warnings.warn(
|
|
1223
|
+
"resolve_from_identifier deprecated, use resolve_from_identifier_slug instead", DeprecationWarning
|
|
1224
|
+
)
|
|
1225
|
+
return self.resolve_from_identifier(identifier_uri, published_only)
|
|
1226
|
+
|
|
1227
|
+
def resolve_from_identifier_value(
|
|
1228
|
+
self, identifier_value: str, published_only: bool = True
|
|
1229
|
+
) -> IdentifierResolutions:
|
|
1230
|
+
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1231
|
+
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1232
|
+
vars: query_dicts.ResolveFromIdentifierValueDict = {
|
|
1233
|
+
"identifier_value": identifier_value,
|
|
1234
|
+
"published_only": int(published_only),
|
|
1235
|
+
}
|
|
1236
|
+
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1237
|
+
self._send_to_eval(
|
|
1238
|
+
vars,
|
|
1239
|
+
"resolve_from_identifier_value.xqy",
|
|
1217
1240
|
),
|
|
1218
1241
|
)
|
|
1219
1242
|
return IdentifierResolutions.from_marklogic_output(raw_results)
|
caselawclient/factories.py
CHANGED
|
@@ -5,11 +5,12 @@ from unittest.mock import Mock
|
|
|
5
5
|
from typing_extensions import TypeAlias
|
|
6
6
|
|
|
7
7
|
from caselawclient.Client import MarklogicApiClient
|
|
8
|
-
from caselawclient.models.documents import Document
|
|
8
|
+
from caselawclient.models.documents import Document
|
|
9
9
|
from caselawclient.models.documents.body import DocumentBody
|
|
10
10
|
from caselawclient.models.judgments import Judgment
|
|
11
11
|
from caselawclient.models.press_summaries import PressSummary
|
|
12
12
|
from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
|
|
13
|
+
from caselawclient.types import DocumentURIString
|
|
13
14
|
|
|
14
15
|
DEFAULT_DOCUMENT_BODY_XML = "<akomantoso>This is some XML of a judgment.</akomantoso>"
|
|
15
16
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import NamedTuple
|
|
3
3
|
|
|
4
|
-
from caselawclient.models.
|
|
4
|
+
from caselawclient.models.identifiers import Identifier
|
|
5
|
+
from caselawclient.models.identifiers.unpacker import IDENTIFIER_NAMESPACE_MAP
|
|
6
|
+
from caselawclient.types import DocumentURIString
|
|
5
7
|
from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
|
|
6
8
|
|
|
7
9
|
|
|
@@ -12,7 +14,7 @@ class IdentifierResolutions(list["IdentifierResolution"]):
|
|
|
12
14
|
MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
|
|
13
15
|
which corresponds to a single identifier to MarkLogic document mapping.
|
|
14
16
|
|
|
15
|
-
see `xquery/
|
|
17
|
+
see `xquery/resolve_from_identifier_slug.xqy` and `resolve_from_identifier` in `Client.py`
|
|
16
18
|
"""
|
|
17
19
|
|
|
18
20
|
@staticmethod
|
|
@@ -31,13 +33,20 @@ class IdentifierResolution(NamedTuple):
|
|
|
31
33
|
document_uri: MarkLogicDocumentURIString
|
|
32
34
|
identifier_slug: DocumentURIString
|
|
33
35
|
document_published: bool
|
|
36
|
+
identifier_value: str
|
|
37
|
+
identifier_namespace: str
|
|
38
|
+
identifier_type: type[Identifier]
|
|
34
39
|
|
|
35
40
|
@staticmethod
|
|
36
41
|
def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
|
|
37
42
|
row = json.loads(raw_row)
|
|
43
|
+
identifier_namespace = row["documents.compiled_url_slugs.identifier_namespace"]
|
|
38
44
|
return IdentifierResolution(
|
|
39
45
|
identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
|
|
40
46
|
document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
|
|
41
47
|
identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
|
|
42
48
|
document_published=row["documents.compiled_url_slugs.document_published"] == "true",
|
|
49
|
+
identifier_value=row["documents.compiled_url_slugs.identifier_value"],
|
|
50
|
+
identifier_namespace=identifier_namespace,
|
|
51
|
+
identifier_type=IDENTIFIER_NAMESPACE_MAP[identifier_namespace],
|
|
43
52
|
)
|
|
@@ -29,11 +29,11 @@ from caselawclient.models.utilities.aws import (
|
|
|
29
29
|
publish_documents,
|
|
30
30
|
request_parse,
|
|
31
31
|
unpublish_documents,
|
|
32
|
-
uri_for_s3,
|
|
33
32
|
)
|
|
33
|
+
from caselawclient.types import DocumentURIString
|
|
34
34
|
|
|
35
35
|
from .body import DocumentBody
|
|
36
|
-
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
36
|
+
from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
|
|
37
37
|
from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
|
|
38
38
|
|
|
39
39
|
MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
|
|
@@ -50,28 +50,6 @@ if TYPE_CHECKING:
|
|
|
50
50
|
from caselawclient.Client import MarklogicApiClient
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class DocumentURIString(str):
|
|
54
|
-
"""
|
|
55
|
-
This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
def __new__(cls, content: str) -> "DocumentURIString":
|
|
59
|
-
# Check that the URI doesn't begin or end with a slash
|
|
60
|
-
if content[0] == "/" or content[-1] == "/":
|
|
61
|
-
raise InvalidDocumentURIException(
|
|
62
|
-
f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Check that the URI doesn't contain a full stop
|
|
66
|
-
if "." in content:
|
|
67
|
-
raise InvalidDocumentURIException(
|
|
68
|
-
f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# If everything is good, return as usual
|
|
72
|
-
return str.__new__(cls, content)
|
|
73
|
-
|
|
74
|
-
|
|
75
53
|
class Document:
|
|
76
54
|
"""
|
|
77
55
|
A base class from which all other document types are extensions. This class includes the essential methods for
|
|
@@ -216,11 +194,11 @@ class Document:
|
|
|
216
194
|
|
|
217
195
|
@property
|
|
218
196
|
def docx_url(self) -> str:
|
|
219
|
-
return generate_docx_url(
|
|
197
|
+
return generate_docx_url(self.uri)
|
|
220
198
|
|
|
221
199
|
@property
|
|
222
200
|
def pdf_url(self) -> str:
|
|
223
|
-
return generate_pdf_url(
|
|
201
|
+
return generate_pdf_url(self.uri)
|
|
224
202
|
|
|
225
203
|
@cached_property
|
|
226
204
|
def assigned_to(self) -> str:
|
|
@@ -438,7 +416,7 @@ class Document:
|
|
|
438
416
|
self.identifiers.add(document_fclid)
|
|
439
417
|
self.save_identifiers()
|
|
440
418
|
|
|
441
|
-
publish_documents(
|
|
419
|
+
publish_documents(self.uri)
|
|
442
420
|
self.api_client.set_published(self.uri, True)
|
|
443
421
|
announce_document_event(
|
|
444
422
|
uri=self.uri,
|
|
@@ -448,7 +426,7 @@ class Document:
|
|
|
448
426
|
|
|
449
427
|
def unpublish(self) -> None:
|
|
450
428
|
self.api_client.break_checkout(self.uri)
|
|
451
|
-
unpublish_documents(
|
|
429
|
+
unpublish_documents(self.uri)
|
|
452
430
|
self.api_client.set_published(self.uri, False)
|
|
453
431
|
announce_document_event(
|
|
454
432
|
uri=self.uri,
|
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
import warnings
|
|
4
4
|
from functools import cache, cached_property
|
|
5
5
|
from typing import Optional
|
|
6
|
+
from xml.etree.ElementTree import Element
|
|
6
7
|
|
|
7
8
|
import pytz
|
|
8
9
|
from ds_caselaw_utils.types import CourtCode
|
|
@@ -12,6 +13,11 @@ from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
|
12
13
|
|
|
13
14
|
from .xml import XML
|
|
14
15
|
|
|
16
|
+
DEFAULT_NAMESPACES = {
|
|
17
|
+
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
18
|
+
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
19
|
+
}
|
|
20
|
+
|
|
15
21
|
|
|
16
22
|
class UnparsableDate(Warning):
|
|
17
23
|
pass
|
|
@@ -26,35 +32,25 @@ class DocumentBody:
|
|
|
26
32
|
self._xml = XML(xml_bytestring=xml_bytestring)
|
|
27
33
|
""" This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
|
|
28
34
|
|
|
29
|
-
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
|
|
35
|
+
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
|
|
30
36
|
return self._xml.get_xpath_match_string(xpath, namespaces)
|
|
31
37
|
|
|
38
|
+
def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
|
|
39
|
+
return self._xml.get_xpath_match_strings(xpath, namespaces)
|
|
40
|
+
|
|
32
41
|
@cached_property
|
|
33
42
|
def name(self) -> str:
|
|
34
|
-
return self.
|
|
35
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
|
|
36
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
43
|
+
return self.get_xpath_match_string(
|
|
44
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
|
|
37
45
|
)
|
|
38
46
|
|
|
39
47
|
@cached_property
|
|
40
48
|
def court(self) -> str:
|
|
41
|
-
return self.
|
|
42
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
43
|
-
{
|
|
44
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
45
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
46
|
-
},
|
|
47
|
-
)
|
|
49
|
+
return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")
|
|
48
50
|
|
|
49
51
|
@cached_property
|
|
50
52
|
def jurisdiction(self) -> str:
|
|
51
|
-
return self.
|
|
52
|
-
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
|
|
53
|
-
{
|
|
54
|
-
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
55
|
-
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
56
|
-
},
|
|
57
|
-
)
|
|
53
|
+
return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
|
|
58
54
|
|
|
59
55
|
@property
|
|
60
56
|
def court_and_jurisdiction_identifier_string(self) -> CourtCode:
|
|
@@ -64,9 +60,8 @@ class DocumentBody:
|
|
|
64
60
|
|
|
65
61
|
@cached_property
|
|
66
62
|
def document_date_as_string(self) -> str:
|
|
67
|
-
return self.
|
|
63
|
+
return self.get_xpath_match_string(
|
|
68
64
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
69
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
70
65
|
)
|
|
71
66
|
|
|
72
67
|
@cached_property
|
|
@@ -90,9 +85,8 @@ class DocumentBody:
|
|
|
90
85
|
name: Optional[str] = None,
|
|
91
86
|
) -> list[datetime.datetime]:
|
|
92
87
|
name_filter = f"[@name='{name}']" if name else ""
|
|
93
|
-
iso_datetimes = self.
|
|
88
|
+
iso_datetimes = self.get_xpath_match_strings(
|
|
94
89
|
f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
|
|
95
|
-
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
96
90
|
)
|
|
97
91
|
|
|
98
92
|
return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
|
|
@@ -130,9 +124,23 @@ class DocumentBody:
|
|
|
130
124
|
def content_as_xml(self) -> str:
|
|
131
125
|
return self._xml.xml_as_string
|
|
132
126
|
|
|
127
|
+
@cached_property
|
|
128
|
+
def has_content(self) -> bool:
|
|
129
|
+
"""If we do not have a word document, the XML will not contain
|
|
130
|
+
the contents of the judgment, but will contain a preamble."""
|
|
131
|
+
|
|
132
|
+
def stripped_tag_text(tag: Element) -> str:
|
|
133
|
+
return "".join(tag.itertext()).strip()
|
|
134
|
+
|
|
135
|
+
header = self._xml.xml_as_tree.xpath("//akn:header", namespaces=DEFAULT_NAMESPACES)[0]
|
|
136
|
+
content = self._xml.xml_as_tree.xpath("//akn:judgmentBody", namespaces=DEFAULT_NAMESPACES)[0]
|
|
137
|
+
return not (stripped_tag_text(header) == "" and stripped_tag_text(content) == "")
|
|
138
|
+
|
|
133
139
|
@cache
|
|
134
|
-
def content_as_html(self, image_base_url: Optional[str] = None) -> str:
|
|
140
|
+
def content_as_html(self, image_base_url: Optional[str] = None) -> Optional[str]:
|
|
135
141
|
"""Convert the XML representation of the Document into HTML for rendering."""
|
|
142
|
+
if not self.has_content:
|
|
143
|
+
return None
|
|
136
144
|
|
|
137
145
|
html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
|
|
138
146
|
|
|
@@ -10,7 +10,9 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from caselawclient.models.press_summaries import PressSummary
|
|
12
12
|
|
|
13
|
-
from .
|
|
13
|
+
from caselawclient.types import DocumentURIString
|
|
14
|
+
|
|
15
|
+
from .documents import Document
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class Judgment(NeutralCitationMixin, Document):
|
|
@@ -23,11 +23,6 @@ class NeutralCitationMixin(ABC):
|
|
|
23
23
|
|
|
24
24
|
def __init__(self, document_noun: str, *args: Any, **kwargs: Any) -> None:
|
|
25
25
|
self.attributes_to_validate: list[tuple[str, bool, str]] = self.attributes_to_validate + [
|
|
26
|
-
(
|
|
27
|
-
"has_ncn",
|
|
28
|
-
True,
|
|
29
|
-
f"This {document_noun} has no neutral citation number",
|
|
30
|
-
),
|
|
31
26
|
(
|
|
32
27
|
"has_valid_ncn",
|
|
33
28
|
True,
|
|
@@ -50,4 +45,6 @@ class NeutralCitationMixin(ABC):
|
|
|
50
45
|
@cached_property
|
|
51
46
|
@deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
|
|
52
47
|
def has_valid_ncn(self) -> bool:
|
|
53
|
-
|
|
48
|
+
if self.neutral_citation is None:
|
|
49
|
+
return True
|
|
50
|
+
return neutral_url(self.neutral_citation) is not None
|
|
@@ -8,8 +8,9 @@ from ds_caselaw_utils.types import NeutralCitationString
|
|
|
8
8
|
|
|
9
9
|
from caselawclient.errors import DocumentNotFoundError
|
|
10
10
|
from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
|
|
11
|
+
from caselawclient.types import DocumentURIString
|
|
11
12
|
|
|
12
|
-
from .documents import Document
|
|
13
|
+
from .documents import Document
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from caselawclient.models.judgments import Judgment
|
|
@@ -13,9 +13,18 @@ from mypy_boto3_sns.client import SNSClient
|
|
|
13
13
|
from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
|
|
14
14
|
from typing_extensions import NotRequired
|
|
15
15
|
|
|
16
|
+
from caselawclient.types import DocumentURIString
|
|
17
|
+
|
|
16
18
|
env = environ.Env()
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
class S3PrefixString(str):
|
|
22
|
+
def __new__(cls, content: str) -> "S3PrefixString":
|
|
23
|
+
if content[-1] != "/":
|
|
24
|
+
raise RuntimeError("S3 Prefixes must end in / so they behave like directories")
|
|
25
|
+
return str.__new__(cls, content)
|
|
26
|
+
|
|
27
|
+
|
|
19
28
|
class ParserInstructionsMetadataDict(TypedDict):
|
|
20
29
|
name: Optional[str]
|
|
21
30
|
cite: Optional[str]
|
|
@@ -58,8 +67,9 @@ def create_sns_client() -> SNSClient:
|
|
|
58
67
|
return create_aws_client("sns")
|
|
59
68
|
|
|
60
69
|
|
|
61
|
-
def uri_for_s3(uri:
|
|
62
|
-
|
|
70
|
+
def uri_for_s3(uri: DocumentURIString) -> S3PrefixString:
|
|
71
|
+
"""An S3 Prefix must end with / to avoid uksc/2004/1 matching uksc/2004/1000"""
|
|
72
|
+
return S3PrefixString(uri + "/")
|
|
63
73
|
|
|
64
74
|
|
|
65
75
|
def generate_signed_asset_url(key: str) -> str:
|
|
@@ -79,7 +89,7 @@ def generate_signed_asset_url(key: str) -> str:
|
|
|
79
89
|
)
|
|
80
90
|
|
|
81
91
|
|
|
82
|
-
def check_docx_exists(uri:
|
|
92
|
+
def check_docx_exists(uri: DocumentURIString) -> bool:
|
|
83
93
|
"""Does the docx for a document URI actually exist?"""
|
|
84
94
|
bucket = env("PRIVATE_ASSET_BUCKET", None)
|
|
85
95
|
s3_key = generate_docx_key(uri)
|
|
@@ -93,25 +103,25 @@ def check_docx_exists(uri: str) -> bool:
|
|
|
93
103
|
raise
|
|
94
104
|
|
|
95
105
|
|
|
96
|
-
def generate_docx_key(uri:
|
|
106
|
+
def generate_docx_key(uri: DocumentURIString) -> str:
|
|
97
107
|
"""from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
|
|
98
108
|
return f"{uri}/{uri.replace('/', '_')}.docx"
|
|
99
109
|
|
|
100
110
|
|
|
101
|
-
def generate_docx_url(uri:
|
|
111
|
+
def generate_docx_url(uri: DocumentURIString) -> str:
|
|
102
112
|
"""from a canonical caselaw URI (eat/2022/1) return a signed S3 link for the front end"""
|
|
103
113
|
return generate_signed_asset_url(generate_docx_key(uri))
|
|
104
114
|
|
|
105
115
|
|
|
106
|
-
def generate_pdf_url(uri:
|
|
116
|
+
def generate_pdf_url(uri: DocumentURIString) -> str:
|
|
107
117
|
key = f"{uri}/{uri.replace('/', '_')}.pdf"
|
|
108
118
|
|
|
109
119
|
return generate_signed_asset_url(key)
|
|
110
120
|
|
|
111
121
|
|
|
112
|
-
def delete_from_bucket(uri:
|
|
122
|
+
def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
|
|
113
123
|
client = create_s3_client()
|
|
114
|
-
response = client.list_objects(Bucket=bucket, Prefix=uri)
|
|
124
|
+
response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
|
|
115
125
|
|
|
116
126
|
if response.get("Contents"):
|
|
117
127
|
objects_to_delete: list[ObjectIdentifierTypeDef] = [{"Key": obj["Key"]} for obj in response.get("Contents", [])]
|
|
@@ -123,7 +133,7 @@ def delete_from_bucket(uri: str, bucket: str) -> None:
|
|
|
123
133
|
)
|
|
124
134
|
|
|
125
135
|
|
|
126
|
-
def publish_documents(uri:
|
|
136
|
+
def publish_documents(uri: DocumentURIString) -> None:
|
|
127
137
|
"""
|
|
128
138
|
Copy assets from the unpublished bucket to the published one.
|
|
129
139
|
Don't copy parser logs and package tar gz.
|
|
@@ -134,7 +144,7 @@ def publish_documents(uri: str) -> None:
|
|
|
134
144
|
public_bucket = env("PUBLIC_ASSET_BUCKET")
|
|
135
145
|
private_bucket = env("PRIVATE_ASSET_BUCKET")
|
|
136
146
|
|
|
137
|
-
response = client.list_objects(Bucket=private_bucket, Prefix=uri)
|
|
147
|
+
response = client.list_objects(Bucket=private_bucket, Prefix=uri_for_s3(uri))
|
|
138
148
|
|
|
139
149
|
for result in response.get("Contents", []):
|
|
140
150
|
print(f"Contemplating copying {result!r}")
|
|
@@ -152,15 +162,15 @@ def publish_documents(uri: str) -> None:
|
|
|
152
162
|
)
|
|
153
163
|
|
|
154
164
|
|
|
155
|
-
def unpublish_documents(uri:
|
|
165
|
+
def unpublish_documents(uri: DocumentURIString) -> None:
|
|
156
166
|
delete_from_bucket(uri, env("PUBLIC_ASSET_BUCKET"))
|
|
157
167
|
|
|
158
168
|
|
|
159
|
-
def delete_documents_from_private_bucket(uri:
|
|
169
|
+
def delete_documents_from_private_bucket(uri: DocumentURIString) -> None:
|
|
160
170
|
delete_from_bucket(uri, env("PRIVATE_ASSET_BUCKET"))
|
|
161
171
|
|
|
162
172
|
|
|
163
|
-
def announce_document_event(uri:
|
|
173
|
+
def announce_document_event(uri: DocumentURIString, status: str, enrich: bool = False) -> None:
|
|
164
174
|
client = create_sns_client()
|
|
165
175
|
|
|
166
176
|
message_attributes: dict[str, MessageAttributeValueTypeDef] = {}
|
|
@@ -186,17 +196,14 @@ def announce_document_event(uri: str, status: str, enrich: bool = False) -> None
|
|
|
186
196
|
)
|
|
187
197
|
|
|
188
198
|
|
|
189
|
-
def copy_assets(old_uri:
|
|
199
|
+
def copy_assets(old_uri: DocumentURIString, new_uri: DocumentURIString) -> None:
|
|
190
200
|
"""
|
|
191
201
|
Copy *unpublished* assets from one path to another,
|
|
192
202
|
renaming DOCX and PDF files as appropriate.
|
|
193
203
|
"""
|
|
194
204
|
client = create_s3_client()
|
|
195
205
|
bucket = env("PRIVATE_ASSET_BUCKET")
|
|
196
|
-
|
|
197
|
-
new_uri = uri_for_s3(new_uri)
|
|
198
|
-
|
|
199
|
-
response = client.list_objects(Bucket=bucket, Prefix=old_uri)
|
|
206
|
+
response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(old_uri))
|
|
200
207
|
|
|
201
208
|
for result in response.get("Contents", []):
|
|
202
209
|
old_key = str(result["Key"])
|
|
@@ -212,7 +219,7 @@ def copy_assets(old_uri: str, new_uri: str) -> None:
|
|
|
212
219
|
)
|
|
213
220
|
|
|
214
221
|
|
|
215
|
-
def build_new_key(old_key: str, new_uri:
|
|
222
|
+
def build_new_key(old_key: str, new_uri: DocumentURIString) -> str:
|
|
216
223
|
"""Ensure that DOCX and PDF filenames are modified to reflect their new home
|
|
217
224
|
as we get the name of the new S3 path"""
|
|
218
225
|
old_filename = old_key.rsplit("/", 1)[-1]
|
|
@@ -224,7 +231,7 @@ def build_new_key(old_key: str, new_uri: str) -> str:
|
|
|
224
231
|
|
|
225
232
|
|
|
226
233
|
def request_parse(
|
|
227
|
-
uri:
|
|
234
|
+
uri: DocumentURIString,
|
|
228
235
|
reference: Optional[str],
|
|
229
236
|
parser_instructions: Optional[ParserInstructionsDict] = None,
|
|
230
237
|
) -> None:
|
|
@@ -4,8 +4,8 @@ import ds_caselaw_utils as caselawutils
|
|
|
4
4
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
5
5
|
|
|
6
6
|
from caselawclient.errors import MarklogicAPIError
|
|
7
|
-
from caselawclient.models.documents import DocumentURIString
|
|
8
7
|
from caselawclient.models.utilities.aws import copy_assets
|
|
8
|
+
from caselawclient.types import DocumentURIString
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from caselawclient.Client import MarklogicApiClient
|
|
@@ -12,7 +12,7 @@ from ds_caselaw_utils.types import CourtCode, JurisdictionCode
|
|
|
12
12
|
from lxml import etree
|
|
13
13
|
|
|
14
14
|
from caselawclient.Client import MarklogicApiClient
|
|
15
|
-
from caselawclient.
|
|
15
|
+
from caselawclient.types import DocumentURIString
|
|
16
16
|
from caselawclient.xml_helpers import get_xpath_match_string
|
|
17
17
|
|
|
18
18
|
|
caselawclient/types.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
class InvalidDocumentURIException(Exception):
|
|
2
|
+
"""The document URI is not valid."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DocumentURIString(str):
|
|
6
|
+
"""
|
|
7
|
+
This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __new__(cls, content: str) -> "DocumentURIString":
|
|
11
|
+
# Check that the URI doesn't begin or end with a slash
|
|
12
|
+
if content[0] == "/" or content[-1] == "/":
|
|
13
|
+
raise InvalidDocumentURIException(
|
|
14
|
+
f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Check that the URI doesn't contain a full stop
|
|
18
|
+
if "." in content:
|
|
19
|
+
raise InvalidDocumentURIException(
|
|
20
|
+
f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# If everything is good, return as usual
|
|
24
|
+
return str.__new__(cls, content)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
xquery version "1.0-ml";
|
|
2
|
+
|
|
3
|
+
declare namespace xdmp="http://marklogic.com/xdmp";
|
|
4
|
+
declare variable $identifier_value as xs:string external;
|
|
5
|
+
declare variable $published_only as xs:int? external := 1;
|
|
6
|
+
|
|
7
|
+
let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
|
|
8
|
+
let $query := "SELECT * from compiled_url_slugs WHERE (identifier_value = @value)" || $published_query
|
|
9
|
+
|
|
10
|
+
return xdmp:sql(
|
|
11
|
+
$query,
|
|
12
|
+
"map",
|
|
13
|
+
map:new((
|
|
14
|
+
map:entry("value", $identifier_value)
|
|
15
|
+
))
|
|
16
|
+
)
|
|
17
|
+
|
|
@@ -7,7 +7,7 @@ checks. They are used to enforce appropriately typed variables being passed in t
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from typing import Any, NewType, Optional, TypedDict
|
|
10
|
-
from caselawclient.
|
|
10
|
+
from caselawclient.types import DocumentURIString
|
|
11
11
|
|
|
12
12
|
MarkLogicDocumentURIString = NewType("MarkLogicDocumentURIString", str)
|
|
13
13
|
MarkLogicDocumentVersionURIString = NewType("MarkLogicDocumentVersionURIString", MarkLogicDocumentURIString)
|
|
@@ -141,12 +141,18 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
|
|
|
141
141
|
uri: MarkLogicDocumentURIString
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
#
|
|
145
|
-
class
|
|
144
|
+
# resolve_from_identifier_slug.xqy
|
|
145
|
+
class ResolveFromIdentifierSlugDict(MarkLogicAPIDict):
|
|
146
146
|
identifier_uri: DocumentURIString
|
|
147
147
|
published_only: Optional[int]
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
# resolve_from_identifier_value.xqy
|
|
151
|
+
class ResolveFromIdentifierValueDict(MarkLogicAPIDict):
|
|
152
|
+
identifier_value: str
|
|
153
|
+
published_only: Optional[int]
|
|
154
|
+
|
|
155
|
+
|
|
150
156
|
# set_boolean_property.xqy
|
|
151
157
|
class SetBooleanPropertyDict(MarkLogicAPIDict):
|
|
152
158
|
name: str
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 30.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
caselawclient/Client.py,sha256=
|
|
1
|
+
caselawclient/Client.py,sha256=JBtcmyuY8tHm1pj62BkmtLSjsZ825ver9sefTLhltIc,45005
|
|
2
2
|
caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
|
|
3
3
|
caselawclient/client_helpers/__init__.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
|
|
4
4
|
caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
|
|
5
5
|
caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
|
|
6
6
|
caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
|
|
7
|
-
caselawclient/factories.py,sha256=
|
|
8
|
-
caselawclient/identifier_resolution.py,sha256=
|
|
7
|
+
caselawclient/factories.py,sha256=yJmecrJNmzvI0_gJZFrpiONI6qt2jTFId7cXCA68-iY,4503
|
|
8
|
+
caselawclient/identifier_resolution.py,sha256=pqapUH8oiZF3ie-s_CI0hvZwH__JVcjJ4VxkpBxswmA,2354
|
|
9
9
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
10
|
-
caselawclient/models/documents/__init__.py,sha256=
|
|
11
|
-
caselawclient/models/documents/body.py,sha256=
|
|
12
|
-
caselawclient/models/documents/exceptions.py,sha256=
|
|
10
|
+
caselawclient/models/documents/__init__.py,sha256=SKYgOOpO4i-lhTWSB16eI6052GEh5ZlqvK3Dggd3sOg,18644
|
|
11
|
+
caselawclient/models/documents/body.py,sha256=2rhNzCsXU13n4nw8m_GU2f_FyYGE8wUYOecBqFmZFHo,5999
|
|
12
|
+
caselawclient/models/documents/exceptions.py,sha256=Mz1P8uNqf5w6uLnRwJt6xK7efsVqtd5VA-WXUUH7QLk,285
|
|
13
13
|
caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
|
|
14
14
|
caselawclient/models/documents/transforms/html.xsl,sha256=oSSO-IBX4qLiSWexQYmWJfGNevF09aCBx4D1NYqXxpo,38322
|
|
15
15
|
caselawclient/models/documents/xml.py,sha256=HlmPb63lLMnySSOLP4iexcAyQiLByKBZtTd25f8sY8M,1268
|
|
@@ -18,19 +18,20 @@ caselawclient/models/identifiers/fclid.py,sha256=pTO586ra0sr4DbjHSxuI8UlxfNXLm9n
|
|
|
18
18
|
caselawclient/models/identifiers/neutral_citation.py,sha256=3Jw1_-NmGfGmrWGFSzLdTHBYHIHq4tPkF8U7Jba-jGo,1848
|
|
19
19
|
caselawclient/models/identifiers/press_summary_ncn.py,sha256=r55-qgi9LDnGxY8vTKijzotGknA6mNLpu55QQTV8Lxo,652
|
|
20
20
|
caselawclient/models/identifiers/unpacker.py,sha256=xvp480QESbN36NEc6qeo-orqOBq6WchnLI7thY7A1qs,2156
|
|
21
|
-
caselawclient/models/judgments.py,sha256=
|
|
22
|
-
caselawclient/models/neutral_citation_mixin.py,sha256=
|
|
23
|
-
caselawclient/models/press_summaries.py,sha256=
|
|
21
|
+
caselawclient/models/judgments.py,sha256=hYPmzWcxS_Pi87vWEezLI5geQqm1tRh_HtGqHpm49Zg,1900
|
|
22
|
+
caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_JfwkbgbSIXaqFVQahfu95do,2086
|
|
23
|
+
caselawclient/models/press_summaries.py,sha256=f0Qyv5_7K8tWv-HVGv2QHlS4WMh5Dh4ZhbYJOJYD9lw,1867
|
|
24
24
|
caselawclient/models/utilities/__init__.py,sha256=u3yIhbTjFQ1JJyAm5wsMEBswWl4t6Z7UMORF5FqC2xQ,1257
|
|
25
|
-
caselawclient/models/utilities/aws.py,sha256=
|
|
25
|
+
caselawclient/models/utilities/aws.py,sha256=E4nFcNC2xxPUv0Xkfi2XTO3FyIH6jaAy9pgsvR48Eg8,8759
|
|
26
26
|
caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
|
|
27
|
-
caselawclient/models/utilities/move.py,sha256=
|
|
27
|
+
caselawclient/models/utilities/move.py,sha256=MXdUqkSiyqRb8YKs_66B6ICWn8EWM6DiJV95fuJO1Us,3610
|
|
28
28
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
|
|
30
30
|
caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
|
|
31
|
-
caselawclient/responses/search_result.py,sha256=
|
|
31
|
+
caselawclient/responses/search_result.py,sha256=hotJHJ9wQusjb4PjZm1DDZv2HEsXWTtZU5TOTj2T0rw,8205
|
|
32
32
|
caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
|
|
33
33
|
caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
|
|
34
|
+
caselawclient/types.py,sha256=vVOK78bFsnHXdOGx1899biR2QiCSVNBKoDbziJPCb68,920
|
|
34
35
|
caselawclient/xml_helpers.py,sha256=FEtE8gxaEZmcgua-Xu8awPmiOm9K58OSabEYVGpiVEY,493
|
|
35
36
|
caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
|
|
36
37
|
caselawclient/xquery/checkin_judgment.xqy,sha256=QeGqO3kL-q0UrjopCVU0lCbkwbyoc5SuNLYFAIbbyMg,197
|
|
@@ -59,7 +60,8 @@ caselawclient/xquery/get_version_annotation.xqy,sha256=pFDMGA9SxI59iUPaoAeUsq23k
|
|
|
59
60
|
caselawclient/xquery/get_version_created.xqy,sha256=bRweaXFtwMBNzL16SlOdiOxHkbqNUwpwDHLxpZYVCh0,250
|
|
60
61
|
caselawclient/xquery/insert_document.xqy,sha256=iP2xTaLGa-u6X9KfS1yJ6yPCKQUWQFYdEW1S4YcMY7w,531
|
|
61
62
|
caselawclient/xquery/list_judgment_versions.xqy,sha256=WShga8igeD21hSLfVSvCOiDMPDhNH6KGf1OW6G0SAkY,190
|
|
62
|
-
caselawclient/xquery/
|
|
63
|
+
caselawclient/xquery/resolve_from_identifier_slug.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
|
|
64
|
+
caselawclient/xquery/resolve_from_identifier_value.xqy,sha256=7uP3DnRi67qSp0aUhW6Cv_GA8BQGw6GuvtAghjrT7Z4,493
|
|
63
65
|
caselawclient/xquery/set_boolean_property.xqy,sha256=8Vg3yDWqeDynUJQHw2OF4daDIKTnp8ARol1_OCqY0Dk,355
|
|
64
66
|
caselawclient/xquery/set_metadata_citation.xqy,sha256=ImwijXowvOCiH_br_LepnKsEpys9tg4Cf3uz6MoC5-c,659
|
|
65
67
|
caselawclient/xquery/set_metadata_court.xqy,sha256=xQGR3e4pdJuDPMlzdAdzrBDSeQbEFiLVIm2z_KQI_Ds,996
|
|
@@ -77,8 +79,8 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
|
|
|
77
79
|
caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
|
|
78
80
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
79
81
|
caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
|
|
80
|
-
caselawclient/xquery_type_dicts.py,sha256=
|
|
81
|
-
ds_caselaw_marklogic_api_client-
|
|
82
|
-
ds_caselaw_marklogic_api_client-
|
|
83
|
-
ds_caselaw_marklogic_api_client-
|
|
84
|
-
ds_caselaw_marklogic_api_client-
|
|
82
|
+
caselawclient/xquery_type_dicts.py,sha256=qZ_bUiceoHw8c02FAFWe75zdpmK8GYUJzi2q-bc1NXA,6266
|
|
83
|
+
ds_caselaw_marklogic_api_client-30.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
84
|
+
ds_caselaw_marklogic_api_client-30.0.0.dist-info/METADATA,sha256=6DnjxKcXIc01ysu22YKZBC5mek5oQtmxiRnTEljxzN0,4264
|
|
85
|
+
ds_caselaw_marklogic_api_client-30.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
86
|
+
ds_caselaw_marklogic_api_client-30.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|