ds-caselaw-marklogic-api-client 29.1.1__py3-none-any.whl → 30.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

caselawclient/Client.py CHANGED
@@ -25,12 +25,12 @@ from caselawclient.models.documents import (
25
25
  DOCUMENT_COLLECTION_URI_JUDGMENT,
26
26
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
27
27
  Document,
28
- DocumentURIString,
29
28
  )
30
29
  from caselawclient.models.judgments import Judgment
31
30
  from caselawclient.models.press_summaries import PressSummary
32
31
  from caselawclient.models.utilities import move
33
32
  from caselawclient.search_parameters import SearchParameters
33
+ from caselawclient.types import DocumentURIString
34
34
  from caselawclient.xquery_type_dicts import (
35
35
  MarkLogicDocumentURIString,
36
36
  MarkLogicDocumentVersionURIString,
@@ -1203,17 +1203,40 @@ class MarklogicApiClient:
1203
1203
 
1204
1204
  return results
1205
1205
 
1206
- def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1206
+ def resolve_from_identifier_slug(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1207
1207
  """Given a PUI/EUI url, look up the precomputed slug and return the
1208
1208
  MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1209
- vars: query_dicts.ResolveFromIdentifierDict = {
1209
+ vars: query_dicts.ResolveFromIdentifierSlugDict = {
1210
1210
  "identifier_uri": DocumentURIString(identifier_uri),
1211
1211
  "published_only": int(published_only),
1212
1212
  }
1213
1213
  raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1214
1214
  self._send_to_eval(
1215
1215
  vars,
1216
- "resolve_from_identifier.xqy",
1216
+ "resolve_from_identifier_slug.xqy",
1217
+ ),
1218
+ )
1219
+ return IdentifierResolutions.from_marklogic_output(raw_results)
1220
+
1221
+ def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1222
+ warnings.warn(
1223
+ "resolve_from_identifier deprecated, use resolve_from_identifier_slug instead", DeprecationWarning
1224
+ )
1225
+ return self.resolve_from_identifier(identifier_uri, published_only)
1226
+
1227
+ def resolve_from_identifier_value(
1228
+ self, identifier_value: str, published_only: bool = True
1229
+ ) -> IdentifierResolutions:
1230
+ """Given a PUI/EUI url, look up the precomputed slug and return the
1231
+ MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1232
+ vars: query_dicts.ResolveFromIdentifierValueDict = {
1233
+ "identifier_value": identifier_value,
1234
+ "published_only": int(published_only),
1235
+ }
1236
+ raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1237
+ self._send_to_eval(
1238
+ vars,
1239
+ "resolve_from_identifier_value.xqy",
1217
1240
  ),
1218
1241
  )
1219
1242
  return IdentifierResolutions.from_marklogic_output(raw_results)
@@ -5,11 +5,12 @@ from unittest.mock import Mock
5
5
  from typing_extensions import TypeAlias
6
6
 
7
7
  from caselawclient.Client import MarklogicApiClient
8
- from caselawclient.models.documents import Document, DocumentURIString
8
+ from caselawclient.models.documents import Document
9
9
  from caselawclient.models.documents.body import DocumentBody
10
10
  from caselawclient.models.judgments import Judgment
11
11
  from caselawclient.models.press_summaries import PressSummary
12
12
  from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
13
+ from caselawclient.types import DocumentURIString
13
14
 
14
15
  DEFAULT_DOCUMENT_BODY_XML = "<akomantoso>This is some XML of a judgment.</akomantoso>"
15
16
 
@@ -1,7 +1,9 @@
1
1
  import json
2
2
  from typing import NamedTuple
3
3
 
4
- from caselawclient.models.documents import DocumentURIString
4
+ from caselawclient.models.identifiers import Identifier
5
+ from caselawclient.models.identifiers.unpacker import IDENTIFIER_NAMESPACE_MAP
6
+ from caselawclient.types import DocumentURIString
5
7
  from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
6
8
 
7
9
 
@@ -12,7 +14,7 @@ class IdentifierResolutions(list["IdentifierResolution"]):
12
14
  MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
13
15
  which corresponds to a single identifier to MarkLogic document mapping.
14
16
 
15
- see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py`
17
+ see `xquery/resolve_from_identifier_slug.xqy` and `resolve_from_identifier` in `Client.py`
16
18
  """
17
19
 
18
20
  @staticmethod
@@ -31,13 +33,20 @@ class IdentifierResolution(NamedTuple):
31
33
  document_uri: MarkLogicDocumentURIString
32
34
  identifier_slug: DocumentURIString
33
35
  document_published: bool
36
+ identifier_value: str
37
+ identifier_namespace: str
38
+ identifier_type: type[Identifier]
34
39
 
35
40
  @staticmethod
36
41
  def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
37
42
  row = json.loads(raw_row)
43
+ identifier_namespace = row["documents.compiled_url_slugs.identifier_namespace"]
38
44
  return IdentifierResolution(
39
45
  identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
40
46
  document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
41
47
  identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
42
48
  document_published=row["documents.compiled_url_slugs.document_published"] == "true",
49
+ identifier_value=row["documents.compiled_url_slugs.identifier_value"],
50
+ identifier_namespace=identifier_namespace,
51
+ identifier_type=IDENTIFIER_NAMESPACE_MAP[identifier_namespace],
43
52
  )
@@ -29,11 +29,11 @@ from caselawclient.models.utilities.aws import (
29
29
  publish_documents,
30
30
  request_parse,
31
31
  unpublish_documents,
32
- uri_for_s3,
33
32
  )
33
+ from caselawclient.types import DocumentURIString
34
34
 
35
35
  from .body import DocumentBody
36
- from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion, InvalidDocumentURIException
36
+ from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
37
37
  from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
38
38
 
39
39
  MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
@@ -50,28 +50,6 @@ if TYPE_CHECKING:
50
50
  from caselawclient.Client import MarklogicApiClient
51
51
 
52
52
 
53
- class DocumentURIString(str):
54
- """
55
- This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
56
- """
57
-
58
- def __new__(cls, content: str) -> "DocumentURIString":
59
- # Check that the URI doesn't begin or end with a slash
60
- if content[0] == "/" or content[-1] == "/":
61
- raise InvalidDocumentURIException(
62
- f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
63
- )
64
-
65
- # Check that the URI doesn't contain a full stop
66
- if "." in content:
67
- raise InvalidDocumentURIException(
68
- f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
69
- )
70
-
71
- # If everything is good, return as usual
72
- return str.__new__(cls, content)
73
-
74
-
75
53
  class Document:
76
54
  """
77
55
  A base class from which all other document types are extensions. This class includes the essential methods for
@@ -216,11 +194,11 @@ class Document:
216
194
 
217
195
  @property
218
196
  def docx_url(self) -> str:
219
- return generate_docx_url(uri_for_s3(self.uri))
197
+ return generate_docx_url(self.uri)
220
198
 
221
199
  @property
222
200
  def pdf_url(self) -> str:
223
- return generate_pdf_url(uri_for_s3(self.uri))
201
+ return generate_pdf_url(self.uri)
224
202
 
225
203
  @cached_property
226
204
  def assigned_to(self) -> str:
@@ -438,7 +416,7 @@ class Document:
438
416
  self.identifiers.add(document_fclid)
439
417
  self.save_identifiers()
440
418
 
441
- publish_documents(uri_for_s3(self.uri))
419
+ publish_documents(self.uri)
442
420
  self.api_client.set_published(self.uri, True)
443
421
  announce_document_event(
444
422
  uri=self.uri,
@@ -448,7 +426,7 @@ class Document:
448
426
 
449
427
  def unpublish(self) -> None:
450
428
  self.api_client.break_checkout(self.uri)
451
- unpublish_documents(uri_for_s3(self.uri))
429
+ unpublish_documents(self.uri)
452
430
  self.api_client.set_published(self.uri, False)
453
431
  announce_document_event(
454
432
  uri=self.uri,
@@ -3,6 +3,7 @@ import os
3
3
  import warnings
4
4
  from functools import cache, cached_property
5
5
  from typing import Optional
6
+ from xml.etree.ElementTree import Element
6
7
 
7
8
  import pytz
8
9
  from ds_caselaw_utils.types import CourtCode
@@ -12,6 +13,11 @@ from caselawclient.models.utilities.dates import parse_string_date_as_utc
12
13
 
13
14
  from .xml import XML
14
15
 
16
+ DEFAULT_NAMESPACES = {
17
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
18
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
19
+ }
20
+
15
21
 
16
22
  class UnparsableDate(Warning):
17
23
  pass
@@ -26,35 +32,25 @@ class DocumentBody:
26
32
  self._xml = XML(xml_bytestring=xml_bytestring)
27
33
  """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
28
34
 
29
- def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
35
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
30
36
  return self._xml.get_xpath_match_string(xpath, namespaces)
31
37
 
38
+ def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
39
+ return self._xml.get_xpath_match_strings(xpath, namespaces)
40
+
32
41
  @cached_property
33
42
  def name(self) -> str:
34
- return self._xml.get_xpath_match_string(
35
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
36
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
43
+ return self.get_xpath_match_string(
44
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
37
45
  )
38
46
 
39
47
  @cached_property
40
48
  def court(self) -> str:
41
- return self._xml.get_xpath_match_string(
42
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
43
- {
44
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
45
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
46
- },
47
- )
49
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")
48
50
 
49
51
  @cached_property
50
52
  def jurisdiction(self) -> str:
51
- return self._xml.get_xpath_match_string(
52
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
53
- {
54
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
55
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
56
- },
57
- )
53
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
58
54
 
59
55
  @property
60
56
  def court_and_jurisdiction_identifier_string(self) -> CourtCode:
@@ -64,9 +60,8 @@ class DocumentBody:
64
60
 
65
61
  @cached_property
66
62
  def document_date_as_string(self) -> str:
67
- return self._xml.get_xpath_match_string(
63
+ return self.get_xpath_match_string(
68
64
  "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
69
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
70
65
  )
71
66
 
72
67
  @cached_property
@@ -90,9 +85,8 @@ class DocumentBody:
90
85
  name: Optional[str] = None,
91
86
  ) -> list[datetime.datetime]:
92
87
  name_filter = f"[@name='{name}']" if name else ""
93
- iso_datetimes = self._xml.get_xpath_match_strings(
88
+ iso_datetimes = self.get_xpath_match_strings(
94
89
  f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
95
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
96
90
  )
97
91
 
98
92
  return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
@@ -130,9 +124,23 @@ class DocumentBody:
130
124
  def content_as_xml(self) -> str:
131
125
  return self._xml.xml_as_string
132
126
 
127
+ @cached_property
128
+ def has_content(self) -> bool:
129
+ """If we do not have a word document, the XML will not contain
130
+ the contents of the judgment, but will contain a preamble."""
131
+
132
+ def stripped_tag_text(tag: Element) -> str:
133
+ return "".join(tag.itertext()).strip()
134
+
135
+ header = self._xml.xml_as_tree.xpath("//akn:header", namespaces=DEFAULT_NAMESPACES)[0]
136
+ content = self._xml.xml_as_tree.xpath("//akn:judgmentBody", namespaces=DEFAULT_NAMESPACES)[0]
137
+ return not (stripped_tag_text(header) == "" and stripped_tag_text(content) == "")
138
+
133
139
  @cache
134
- def content_as_html(self, image_base_url: Optional[str] = None) -> str:
140
+ def content_as_html(self, image_base_url: Optional[str] = None) -> Optional[str]:
135
141
  """Convert the XML representation of the Document into HTML for rendering."""
142
+ if not self.has_content:
143
+ return None
136
144
 
137
145
  html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
138
146
 
@@ -4,7 +4,3 @@ class CannotPublishUnpublishableDocument(Exception):
4
4
 
5
5
  class DocumentNotSafeForDeletion(Exception):
6
6
  """A document which is not safe for deletion cannot be deleted."""
7
-
8
-
9
- class InvalidDocumentURIException(Exception):
10
- """The document URI is not valid."""
@@ -10,7 +10,9 @@ from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
10
10
  if TYPE_CHECKING:
11
11
  from caselawclient.models.press_summaries import PressSummary
12
12
 
13
- from .documents import Document, DocumentURIString
13
+ from caselawclient.types import DocumentURIString
14
+
15
+ from .documents import Document
14
16
 
15
17
 
16
18
  class Judgment(NeutralCitationMixin, Document):
@@ -23,11 +23,6 @@ class NeutralCitationMixin(ABC):
23
23
 
24
24
  def __init__(self, document_noun: str, *args: Any, **kwargs: Any) -> None:
25
25
  self.attributes_to_validate: list[tuple[str, bool, str]] = self.attributes_to_validate + [
26
- (
27
- "has_ncn",
28
- True,
29
- f"This {document_noun} has no neutral citation number",
30
- ),
31
26
  (
32
27
  "has_valid_ncn",
33
28
  True,
@@ -50,4 +45,6 @@ class NeutralCitationMixin(ABC):
50
45
  @cached_property
51
46
  @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
52
47
  def has_valid_ncn(self) -> bool:
53
- return self.neutral_citation is not None and neutral_url(self.neutral_citation) is not None
48
+ if self.neutral_citation is None:
49
+ return True
50
+ return neutral_url(self.neutral_citation) is not None
@@ -8,8 +8,9 @@ from ds_caselaw_utils.types import NeutralCitationString
8
8
 
9
9
  from caselawclient.errors import DocumentNotFoundError
10
10
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
11
+ from caselawclient.types import DocumentURIString
11
12
 
12
- from .documents import Document, DocumentURIString
13
+ from .documents import Document
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from caselawclient.models.judgments import Judgment
@@ -13,9 +13,18 @@ from mypy_boto3_sns.client import SNSClient
13
13
  from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
14
14
  from typing_extensions import NotRequired
15
15
 
16
+ from caselawclient.types import DocumentURIString
17
+
16
18
  env = environ.Env()
17
19
 
18
20
 
21
+ class S3PrefixString(str):
22
+ def __new__(cls, content: str) -> "S3PrefixString":
23
+ if content[-1] != "/":
24
+ raise RuntimeError("S3 Prefixes must end in / so they behave like directories")
25
+ return str.__new__(cls, content)
26
+
27
+
19
28
  class ParserInstructionsMetadataDict(TypedDict):
20
29
  name: Optional[str]
21
30
  cite: Optional[str]
@@ -58,8 +67,9 @@ def create_sns_client() -> SNSClient:
58
67
  return create_aws_client("sns")
59
68
 
60
69
 
61
- def uri_for_s3(uri: str) -> str:
62
- return uri.lstrip("/")
70
+ def uri_for_s3(uri: DocumentURIString) -> S3PrefixString:
71
+ """An S3 Prefix must end with / to avoid uksc/2004/1 matching uksc/2004/1000"""
72
+ return S3PrefixString(uri + "/")
63
73
 
64
74
 
65
75
  def generate_signed_asset_url(key: str) -> str:
@@ -79,7 +89,7 @@ def generate_signed_asset_url(key: str) -> str:
79
89
  )
80
90
 
81
91
 
82
- def check_docx_exists(uri: str) -> bool:
92
+ def check_docx_exists(uri: DocumentURIString) -> bool:
83
93
  """Does the docx for a document URI actually exist?"""
84
94
  bucket = env("PRIVATE_ASSET_BUCKET", None)
85
95
  s3_key = generate_docx_key(uri)
@@ -93,25 +103,25 @@ def check_docx_exists(uri: str) -> bool:
93
103
  raise
94
104
 
95
105
 
96
- def generate_docx_key(uri: str) -> str:
106
+ def generate_docx_key(uri: DocumentURIString) -> str:
97
107
  """from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
98
108
  return f"{uri}/{uri.replace('/', '_')}.docx"
99
109
 
100
110
 
101
- def generate_docx_url(uri: str) -> str:
111
+ def generate_docx_url(uri: DocumentURIString) -> str:
102
112
  """from a canonical caselaw URI (eat/2022/1) return a signed S3 link for the front end"""
103
113
  return generate_signed_asset_url(generate_docx_key(uri))
104
114
 
105
115
 
106
- def generate_pdf_url(uri: str) -> str:
116
+ def generate_pdf_url(uri: DocumentURIString) -> str:
107
117
  key = f"{uri}/{uri.replace('/', '_')}.pdf"
108
118
 
109
119
  return generate_signed_asset_url(key)
110
120
 
111
121
 
112
- def delete_from_bucket(uri: str, bucket: str) -> None:
122
+ def delete_from_bucket(uri: DocumentURIString, bucket: str) -> None:
113
123
  client = create_s3_client()
114
- response = client.list_objects(Bucket=bucket, Prefix=uri)
124
+ response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(uri))
115
125
 
116
126
  if response.get("Contents"):
117
127
  objects_to_delete: list[ObjectIdentifierTypeDef] = [{"Key": obj["Key"]} for obj in response.get("Contents", [])]
@@ -123,7 +133,7 @@ def delete_from_bucket(uri: str, bucket: str) -> None:
123
133
  )
124
134
 
125
135
 
126
- def publish_documents(uri: str) -> None:
136
+ def publish_documents(uri: DocumentURIString) -> None:
127
137
  """
128
138
  Copy assets from the unpublished bucket to the published one.
129
139
  Don't copy parser logs and package tar gz.
@@ -134,7 +144,7 @@ def publish_documents(uri: str) -> None:
134
144
  public_bucket = env("PUBLIC_ASSET_BUCKET")
135
145
  private_bucket = env("PRIVATE_ASSET_BUCKET")
136
146
 
137
- response = client.list_objects(Bucket=private_bucket, Prefix=uri)
147
+ response = client.list_objects(Bucket=private_bucket, Prefix=uri_for_s3(uri))
138
148
 
139
149
  for result in response.get("Contents", []):
140
150
  print(f"Contemplating copying {result!r}")
@@ -152,15 +162,15 @@ def publish_documents(uri: str) -> None:
152
162
  )
153
163
 
154
164
 
155
- def unpublish_documents(uri: str) -> None:
165
+ def unpublish_documents(uri: DocumentURIString) -> None:
156
166
  delete_from_bucket(uri, env("PUBLIC_ASSET_BUCKET"))
157
167
 
158
168
 
159
- def delete_documents_from_private_bucket(uri: str) -> None:
169
+ def delete_documents_from_private_bucket(uri: DocumentURIString) -> None:
160
170
  delete_from_bucket(uri, env("PRIVATE_ASSET_BUCKET"))
161
171
 
162
172
 
163
- def announce_document_event(uri: str, status: str, enrich: bool = False) -> None:
173
+ def announce_document_event(uri: DocumentURIString, status: str, enrich: bool = False) -> None:
164
174
  client = create_sns_client()
165
175
 
166
176
  message_attributes: dict[str, MessageAttributeValueTypeDef] = {}
@@ -186,17 +196,14 @@ def announce_document_event(uri: str, status: str, enrich: bool = False) -> None
186
196
  )
187
197
 
188
198
 
189
- def copy_assets(old_uri: str, new_uri: str) -> None:
199
+ def copy_assets(old_uri: DocumentURIString, new_uri: DocumentURIString) -> None:
190
200
  """
191
201
  Copy *unpublished* assets from one path to another,
192
202
  renaming DOCX and PDF files as appropriate.
193
203
  """
194
204
  client = create_s3_client()
195
205
  bucket = env("PRIVATE_ASSET_BUCKET")
196
- old_uri = uri_for_s3(old_uri)
197
- new_uri = uri_for_s3(new_uri)
198
-
199
- response = client.list_objects(Bucket=bucket, Prefix=old_uri)
206
+ response = client.list_objects(Bucket=bucket, Prefix=uri_for_s3(old_uri))
200
207
 
201
208
  for result in response.get("Contents", []):
202
209
  old_key = str(result["Key"])
@@ -212,7 +219,7 @@ def copy_assets(old_uri: str, new_uri: str) -> None:
212
219
  )
213
220
 
214
221
 
215
- def build_new_key(old_key: str, new_uri: str) -> str:
222
+ def build_new_key(old_key: str, new_uri: DocumentURIString) -> str:
216
223
  """Ensure that DOCX and PDF filenames are modified to reflect their new home
217
224
  as we get the name of the new S3 path"""
218
225
  old_filename = old_key.rsplit("/", 1)[-1]
@@ -224,7 +231,7 @@ def build_new_key(old_key: str, new_uri: str) -> str:
224
231
 
225
232
 
226
233
  def request_parse(
227
- uri: str,
234
+ uri: DocumentURIString,
228
235
  reference: Optional[str],
229
236
  parser_instructions: Optional[ParserInstructionsDict] = None,
230
237
  ) -> None:
@@ -4,8 +4,8 @@ import ds_caselaw_utils as caselawutils
4
4
  from ds_caselaw_utils.types import NeutralCitationString
5
5
 
6
6
  from caselawclient.errors import MarklogicAPIError
7
- from caselawclient.models.documents import DocumentURIString
8
7
  from caselawclient.models.utilities.aws import copy_assets
8
+ from caselawclient.types import DocumentURIString
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from caselawclient.Client import MarklogicApiClient
@@ -12,7 +12,7 @@ from ds_caselaw_utils.types import CourtCode, JurisdictionCode
12
12
  from lxml import etree
13
13
 
14
14
  from caselawclient.Client import MarklogicApiClient
15
- from caselawclient.models.documents import DocumentURIString
15
+ from caselawclient.types import DocumentURIString
16
16
  from caselawclient.xml_helpers import get_xpath_match_string
17
17
 
18
18
 
caselawclient/types.py ADDED
@@ -0,0 +1,24 @@
1
+ class InvalidDocumentURIException(Exception):
2
+ """The document URI is not valid."""
3
+
4
+
5
+ class DocumentURIString(str):
6
+ """
7
+ This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
8
+ """
9
+
10
+ def __new__(cls, content: str) -> "DocumentURIString":
11
+ # Check that the URI doesn't begin or end with a slash
12
+ if content[0] == "/" or content[-1] == "/":
13
+ raise InvalidDocumentURIException(
14
+ f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
15
+ )
16
+
17
+ # Check that the URI doesn't contain a full stop
18
+ if "." in content:
19
+ raise InvalidDocumentURIException(
20
+ f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
21
+ )
22
+
23
+ # If everything is good, return as usual
24
+ return str.__new__(cls, content)
@@ -0,0 +1,17 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare namespace xdmp="http://marklogic.com/xdmp";
4
+ declare variable $identifier_value as xs:string external;
5
+ declare variable $published_only as xs:int? external := 1;
6
+
7
+ let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
8
+ let $query := "SELECT * from compiled_url_slugs WHERE (identifier_value = @value)" || $published_query
9
+
10
+ return xdmp:sql(
11
+ $query,
12
+ "map",
13
+ map:new((
14
+ map:entry("value", $identifier_value)
15
+ ))
16
+ )
17
+
@@ -7,7 +7,7 @@ checks. They are used to enforce appropriately typed variables being passed in t
7
7
  """
8
8
 
9
9
  from typing import Any, NewType, Optional, TypedDict
10
- from caselawclient.models.documents import DocumentURIString
10
+ from caselawclient.types import DocumentURIString
11
11
 
12
12
  MarkLogicDocumentURIString = NewType("MarkLogicDocumentURIString", str)
13
13
  MarkLogicDocumentVersionURIString = NewType("MarkLogicDocumentVersionURIString", MarkLogicDocumentURIString)
@@ -141,12 +141,18 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
141
141
  uri: MarkLogicDocumentURIString
142
142
 
143
143
 
144
- # resolve_from_identifier.xqy
145
- class ResolveFromIdentifierDict(MarkLogicAPIDict):
144
+ # resolve_from_identifier_slug.xqy
145
+ class ResolveFromIdentifierSlugDict(MarkLogicAPIDict):
146
146
  identifier_uri: DocumentURIString
147
147
  published_only: Optional[int]
148
148
 
149
149
 
150
+ # resolve_from_identifier_value.xqy
151
+ class ResolveFromIdentifierValueDict(MarkLogicAPIDict):
152
+ identifier_value: str
153
+ published_only: Optional[int]
154
+
155
+
150
156
  # set_boolean_property.xqy
151
157
  class SetBooleanPropertyDict(MarkLogicAPIDict):
152
158
  name: str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 29.1.1
3
+ Version: 30.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -1,15 +1,15 @@
1
- caselawclient/Client.py,sha256=6BaVpKSvMcNhSaDXzTkN1h4iV7KFrY7SgSHNGqGQ_2o,43863
1
+ caselawclient/Client.py,sha256=JBtcmyuY8tHm1pj62BkmtLSjsZ825ver9sefTLhltIc,45005
2
2
  caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
3
3
  caselawclient/client_helpers/__init__.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
5
5
  caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
6
6
  caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
7
- caselawclient/factories.py,sha256=6-xZMVmvtXA8AnyWJgJTums1EWfM6lPIhrWQu0NopJo,4472
8
- caselawclient/identifier_resolution.py,sha256=IOqrZcIHoHhNOCAkNveOBcWddBNpkOB8cz1r0zFa8mQ,1829
7
+ caselawclient/factories.py,sha256=yJmecrJNmzvI0_gJZFrpiONI6qt2jTFId7cXCA68-iY,4503
8
+ caselawclient/identifier_resolution.py,sha256=pqapUH8oiZF3ie-s_CI0hvZwH__JVcjJ4VxkpBxswmA,2354
9
9
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
10
- caselawclient/models/documents/__init__.py,sha256=Lzb18MyCPLlG0Y4EdnjZvnGFSSpsNs_BUgJA6BQqL28,19520
11
- caselawclient/models/documents/body.py,sha256=7Sj6lnddcAM8VAeecAS2VEhQHR9CB3G0fL8TAXz7EXw,5588
12
- caselawclient/models/documents/exceptions.py,sha256=rw1xId16vBKvBImgFmFUpeFgKqU7VTNtVLIEVBPGKyk,374
10
+ caselawclient/models/documents/__init__.py,sha256=SKYgOOpO4i-lhTWSB16eI6052GEh5ZlqvK3Dggd3sOg,18644
11
+ caselawclient/models/documents/body.py,sha256=2rhNzCsXU13n4nw8m_GU2f_FyYGE8wUYOecBqFmZFHo,5999
12
+ caselawclient/models/documents/exceptions.py,sha256=Mz1P8uNqf5w6uLnRwJt6xK7efsVqtd5VA-WXUUH7QLk,285
13
13
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
14
14
  caselawclient/models/documents/transforms/html.xsl,sha256=oSSO-IBX4qLiSWexQYmWJfGNevF09aCBx4D1NYqXxpo,38322
15
15
  caselawclient/models/documents/xml.py,sha256=HlmPb63lLMnySSOLP4iexcAyQiLByKBZtTd25f8sY8M,1268
@@ -18,19 +18,20 @@ caselawclient/models/identifiers/fclid.py,sha256=pTO586ra0sr4DbjHSxuI8UlxfNXLm9n
18
18
  caselawclient/models/identifiers/neutral_citation.py,sha256=3Jw1_-NmGfGmrWGFSzLdTHBYHIHq4tPkF8U7Jba-jGo,1848
19
19
  caselawclient/models/identifiers/press_summary_ncn.py,sha256=r55-qgi9LDnGxY8vTKijzotGknA6mNLpu55QQTV8Lxo,652
20
20
  caselawclient/models/identifiers/unpacker.py,sha256=xvp480QESbN36NEc6qeo-orqOBq6WchnLI7thY7A1qs,2156
21
- caselawclient/models/judgments.py,sha256=xFjfOspa9ZL29gvvGVNq11JC7h-LKebFMrQYvVIIoEI,1868
22
- caselawclient/models/neutral_citation_mixin.py,sha256=LDaxNndLcTKsjJCyEKO1kGTJ6YD6h-6SzQVE0-gwPSI,2208
23
- caselawclient/models/press_summaries.py,sha256=bEqJxu-7eBLhwulOsDXYDl2ptIp3RkkMpOcdVvWB5ds,1836
21
+ caselawclient/models/judgments.py,sha256=hYPmzWcxS_Pi87vWEezLI5geQqm1tRh_HtGqHpm49Zg,1900
22
+ caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_JfwkbgbSIXaqFVQahfu95do,2086
23
+ caselawclient/models/press_summaries.py,sha256=f0Qyv5_7K8tWv-HVGv2QHlS4WMh5Dh4ZhbYJOJYD9lw,1867
24
24
  caselawclient/models/utilities/__init__.py,sha256=u3yIhbTjFQ1JJyAm5wsMEBswWl4t6Z7UMORF5FqC2xQ,1257
25
- caselawclient/models/utilities/aws.py,sha256=d-7puHSW8lFLLhaXhAStzNai4_NEMAiPGCNEFUt0Elg,8191
25
+ caselawclient/models/utilities/aws.py,sha256=E4nFcNC2xxPUv0Xkfi2XTO3FyIH6jaAy9pgsvR48Eg8,8759
26
26
  caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
27
- caselawclient/models/utilities/move.py,sha256=Rsx1eGHVjbGz0WMVDjy8b_5t4Ig8aP55sLudL07MVUs,3621
27
+ caselawclient/models/utilities/move.py,sha256=MXdUqkSiyqRb8YKs_66B6ICWn8EWM6DiJV95fuJO1Us,3610
28
28
  caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
30
30
  caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
31
- caselawclient/responses/search_result.py,sha256=2yR3FP4CQsVymE7RrOMbh1owjYaRTrqkjMObbIkSlhE,8216
31
+ caselawclient/responses/search_result.py,sha256=hotJHJ9wQusjb4PjZm1DDZv2HEsXWTtZU5TOTj2T0rw,8205
32
32
  caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
33
33
  caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
34
+ caselawclient/types.py,sha256=vVOK78bFsnHXdOGx1899biR2QiCSVNBKoDbziJPCb68,920
34
35
  caselawclient/xml_helpers.py,sha256=FEtE8gxaEZmcgua-Xu8awPmiOm9K58OSabEYVGpiVEY,493
35
36
  caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
36
37
  caselawclient/xquery/checkin_judgment.xqy,sha256=QeGqO3kL-q0UrjopCVU0lCbkwbyoc5SuNLYFAIbbyMg,197
@@ -59,7 +60,8 @@ caselawclient/xquery/get_version_annotation.xqy,sha256=pFDMGA9SxI59iUPaoAeUsq23k
59
60
  caselawclient/xquery/get_version_created.xqy,sha256=bRweaXFtwMBNzL16SlOdiOxHkbqNUwpwDHLxpZYVCh0,250
60
61
  caselawclient/xquery/insert_document.xqy,sha256=iP2xTaLGa-u6X9KfS1yJ6yPCKQUWQFYdEW1S4YcMY7w,531
61
62
  caselawclient/xquery/list_judgment_versions.xqy,sha256=WShga8igeD21hSLfVSvCOiDMPDhNH6KGf1OW6G0SAkY,190
62
- caselawclient/xquery/resolve_from_identifier.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
63
+ caselawclient/xquery/resolve_from_identifier_slug.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
64
+ caselawclient/xquery/resolve_from_identifier_value.xqy,sha256=7uP3DnRi67qSp0aUhW6Cv_GA8BQGw6GuvtAghjrT7Z4,493
63
65
  caselawclient/xquery/set_boolean_property.xqy,sha256=8Vg3yDWqeDynUJQHw2OF4daDIKTnp8ARol1_OCqY0Dk,355
64
66
  caselawclient/xquery/set_metadata_citation.xqy,sha256=ImwijXowvOCiH_br_LepnKsEpys9tg4Cf3uz6MoC5-c,659
65
67
  caselawclient/xquery/set_metadata_court.xqy,sha256=xQGR3e4pdJuDPMlzdAdzrBDSeQbEFiLVIm2z_KQI_Ds,996
@@ -77,8 +79,8 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
77
79
  caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
78
80
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
79
81
  caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
80
- caselawclient/xquery_type_dicts.py,sha256=kybL-YzwK34Fr6MeWfqVOJHYrs0ZNeDWXDsp8o2Yb1U,6114
81
- ds_caselaw_marklogic_api_client-29.1.1.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
82
- ds_caselaw_marklogic_api_client-29.1.1.dist-info/METADATA,sha256=SaMj0ydb-vZeW6cnwnwr94lGc_XkADBIDeUdWtDlEjM,4264
83
- ds_caselaw_marklogic_api_client-29.1.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
84
- ds_caselaw_marklogic_api_client-29.1.1.dist-info/RECORD,,
82
+ caselawclient/xquery_type_dicts.py,sha256=qZ_bUiceoHw8c02FAFWe75zdpmK8GYUJzi2q-bc1NXA,6266
83
+ ds_caselaw_marklogic_api_client-30.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
84
+ ds_caselaw_marklogic_api_client-30.0.0.dist-info/METADATA,sha256=6DnjxKcXIc01ysu22YKZBC5mek5oQtmxiRnTEljxzN0,4264
85
+ ds_caselaw_marklogic_api_client-30.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
86
+ ds_caselaw_marklogic_api_client-30.0.0.dist-info/RECORD,,