ds-caselaw-marklogic-api-client 29.2.0__py3-none-any.whl → 31.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

caselawclient/Client.py CHANGED
@@ -25,12 +25,12 @@ from caselawclient.models.documents import (
25
25
  DOCUMENT_COLLECTION_URI_JUDGMENT,
26
26
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
27
27
  Document,
28
- DocumentURIString,
29
28
  )
30
29
  from caselawclient.models.judgments import Judgment
31
30
  from caselawclient.models.press_summaries import PressSummary
32
31
  from caselawclient.models.utilities import move
33
32
  from caselawclient.search_parameters import SearchParameters
33
+ from caselawclient.types import DocumentURIString
34
34
  from caselawclient.xquery_type_dicts import (
35
35
  MarkLogicDocumentURIString,
36
36
  MarkLogicDocumentVersionURIString,
@@ -1203,17 +1203,40 @@ class MarklogicApiClient:
1203
1203
 
1204
1204
  return results
1205
1205
 
1206
- def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1206
+ def resolve_from_identifier_slug(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1207
1207
  """Given a PUI/EUI url, look up the precomputed slug and return the
1208
1208
  MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1209
- vars: query_dicts.ResolveFromIdentifierDict = {
1209
+ vars: query_dicts.ResolveFromIdentifierSlugDict = {
1210
1210
  "identifier_uri": DocumentURIString(identifier_uri),
1211
1211
  "published_only": int(published_only),
1212
1212
  }
1213
1213
  raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1214
1214
  self._send_to_eval(
1215
1215
  vars,
1216
- "resolve_from_identifier.xqy",
1216
+ "resolve_from_identifier_slug.xqy",
1217
+ ),
1218
+ )
1219
+ return IdentifierResolutions.from_marklogic_output(raw_results)
1220
+
1221
+ def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1222
+ warnings.warn(
1223
+ "resolve_from_identifier deprecated, use resolve_from_identifier_slug instead", DeprecationWarning
1224
+ )
1225
+ return self.resolve_from_identifier(identifier_uri, published_only)
1226
+
1227
+ def resolve_from_identifier_value(
1228
+ self, identifier_value: str, published_only: bool = True
1229
+ ) -> IdentifierResolutions:
1230
+ """Given a PUI/EUI url, look up the precomputed slug and return the
1231
+ MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1232
+ vars: query_dicts.ResolveFromIdentifierValueDict = {
1233
+ "identifier_value": identifier_value,
1234
+ "published_only": int(published_only),
1235
+ }
1236
+ raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1237
+ self._send_to_eval(
1238
+ vars,
1239
+ "resolve_from_identifier_value.xqy",
1217
1240
  ),
1218
1241
  )
1219
1242
  return IdentifierResolutions.from_marklogic_output(raw_results)
@@ -1,17 +1,29 @@
1
1
  import datetime
2
+ import json
2
3
  from typing import Any, Optional
3
4
  from unittest.mock import Mock
4
5
 
5
6
  from typing_extensions import TypeAlias
6
7
 
7
8
  from caselawclient.Client import MarklogicApiClient
8
- from caselawclient.models.documents import Document, DocumentURIString
9
+ from caselawclient.identifier_resolution import IdentifierResolution, IdentifierResolutions
10
+ from caselawclient.models.documents import Document
9
11
  from caselawclient.models.documents.body import DocumentBody
10
12
  from caselawclient.models.judgments import Judgment
11
13
  from caselawclient.models.press_summaries import PressSummary
12
14
  from caselawclient.responses.search_result import SearchResult, SearchResultMetadata
15
+ from caselawclient.types import DocumentURIString
13
16
 
14
- DEFAULT_DOCUMENT_BODY_XML = "<akomantoso>This is some XML of a judgment.</akomantoso>"
17
+ DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
18
+ <judgment name="decision">
19
+ <meta/><header/>
20
+ <judgmentBody>
21
+ <decision>
22
+ <p>This is a document.</p>
23
+ </decision>
24
+ </judgmentBody>
25
+ </judgment>
26
+ </akomaNtoso>"""
15
27
 
16
28
 
17
29
  class DocumentBodyFactory:
@@ -55,7 +67,6 @@ class DocumentFactory:
55
67
  def build(
56
68
  cls,
57
69
  uri: DocumentURIString = DocumentURIString("test/2023/123"),
58
- html: str = "<p>This is a judgment.</p>",
59
70
  api_client: Optional[MarklogicApiClient] = None,
60
71
  **kwargs: Any,
61
72
  ) -> target_class:
@@ -65,7 +76,6 @@ class DocumentFactory:
65
76
  api_client.get_property_as_node.return_value = None
66
77
 
67
78
  document = cls.target_class(uri, api_client=api_client)
68
- document.content_as_html = Mock(return_value=html) # type: ignore[method-assign]
69
79
  document.body = kwargs.pop("body") if "body" in kwargs else DocumentBodyFactory.build()
70
80
 
71
81
  for param_name, default_value in cls.PARAMS_MAP.items():
@@ -132,3 +142,33 @@ class SearchResultFactory(SimpleFactory):
132
142
  "metadata": SearchResultMetadataFactory.build(),
133
143
  "is_failure": False,
134
144
  }
145
+
146
+
147
+ class IdentifierResolutionFactory:
148
+ @classmethod
149
+ def build(
150
+ self,
151
+ resolution_uuid: Optional[str] = None,
152
+ document_uri: Optional[str] = None,
153
+ identifier_slug: Optional[str] = None,
154
+ published: Optional[bool] = True,
155
+ namespace: Optional[str] = None,
156
+ value: Optional[str] = None,
157
+ ) -> IdentifierResolution:
158
+ raw_resolution = {
159
+ "documents.compiled_url_slugs.identifier_uuid": resolution_uuid or "24b9a384-8bcf-4f20-996a-5c318f8dc657",
160
+ "documents.compiled_url_slugs.document_uri": document_uri or "/ewca/civ/2003/547.xml",
161
+ "documents.compiled_url_slugs.identifier_slug": identifier_slug or "ewca/civ/2003/54721",
162
+ "documents.compiled_url_slugs.document_published": "true" if published else "false",
163
+ "documents.compiled_url_slugs.identifier_namespace": namespace or "ukncn",
164
+ "documents.compiled_url_slugs.identifier_value": value or "[2003] EWCA 54721 (Civ)",
165
+ }
166
+ return IdentifierResolution.from_marklogic_output(json.dumps(raw_resolution))
167
+
168
+
169
+ class IdentifierResolutionsFactory:
170
+ @classmethod
171
+ def build(self, resolutions: Optional[list[IdentifierResolution]] = None) -> IdentifierResolutions:
172
+ if resolutions is None:
173
+ resolutions = [IdentifierResolutionFactory.build()]
174
+ return IdentifierResolutions(resolutions)
@@ -1,7 +1,9 @@
1
1
  import json
2
2
  from typing import NamedTuple
3
3
 
4
- from caselawclient.models.documents import DocumentURIString
4
+ from caselawclient.models.identifiers import Identifier
5
+ from caselawclient.models.identifiers.unpacker import IDENTIFIER_NAMESPACE_MAP
6
+ from caselawclient.types import DocumentURIString
5
7
  from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
6
8
 
7
9
 
@@ -12,7 +14,7 @@ class IdentifierResolutions(list["IdentifierResolution"]):
12
14
  MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
13
15
  which corresponds to a single identifier to MarkLogic document mapping.
14
16
 
15
- see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py`
17
+ see `xquery/resolve_from_identifier_slug.xqy` and `resolve_from_identifier` in `Client.py`
16
18
  """
17
19
 
18
20
  @staticmethod
@@ -31,13 +33,20 @@ class IdentifierResolution(NamedTuple):
31
33
  document_uri: MarkLogicDocumentURIString
32
34
  identifier_slug: DocumentURIString
33
35
  document_published: bool
36
+ identifier_value: str
37
+ identifier_namespace: str
38
+ identifier_type: type[Identifier]
34
39
 
35
40
  @staticmethod
36
41
  def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
37
42
  row = json.loads(raw_row)
43
+ identifier_namespace = row["documents.compiled_url_slugs.identifier_namespace"]
38
44
  return IdentifierResolution(
39
45
  identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
40
46
  document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
41
47
  identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
42
48
  document_published=row["documents.compiled_url_slugs.document_published"] == "true",
49
+ identifier_value=row["documents.compiled_url_slugs.identifier_value"],
50
+ identifier_namespace=identifier_namespace,
51
+ identifier_type=IDENTIFIER_NAMESPACE_MAP[identifier_namespace],
43
52
  )
@@ -6,15 +6,14 @@ from typing import TYPE_CHECKING, Any, Optional
6
6
  from ds_caselaw_utils import courts
7
7
  from ds_caselaw_utils.courts import CourtNotFoundException
8
8
  from ds_caselaw_utils.types import NeutralCitationString
9
- from lxml import html as html_parser
10
9
  from requests_toolbelt.multipart import decoder
11
10
 
12
11
  from caselawclient.errors import (
13
12
  DocumentNotFoundError,
14
- GatewayTimeoutError,
15
13
  NotSupportedOnVersion,
16
14
  OnlySupportedOnVersion,
17
15
  )
16
+ from caselawclient.identifier_resolution import IdentifierResolutions
18
17
  from caselawclient.models.identifiers import Identifier
19
18
  from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
20
19
  from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
@@ -30,9 +29,10 @@ from caselawclient.models.utilities.aws import (
30
29
  request_parse,
31
30
  unpublish_documents,
32
31
  )
32
+ from caselawclient.types import DocumentURIString
33
33
 
34
34
  from .body import DocumentBody
35
- from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion, InvalidDocumentURIException
35
+ from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
36
36
  from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED
37
37
 
38
38
  MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
@@ -49,28 +49,6 @@ if TYPE_CHECKING:
49
49
  from caselawclient.Client import MarklogicApiClient
50
50
 
51
51
 
52
- class DocumentURIString(str):
53
- """
54
- This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
55
- """
56
-
57
- def __new__(cls, content: str) -> "DocumentURIString":
58
- # Check that the URI doesn't begin or end with a slash
59
- if content[0] == "/" or content[-1] == "/":
60
- raise InvalidDocumentURIException(
61
- f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
62
- )
63
-
64
- # Check that the URI doesn't contain a full stop
65
- if "." in content:
66
- raise InvalidDocumentURIException(
67
- f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
68
- )
69
-
70
- # If everything is good, return as usual
71
- return str.__new__(cls, content)
72
-
73
-
74
52
  class Document:
75
53
  """
76
54
  A base class from which all other document types are extensions. This class includes the essential methods for
@@ -273,39 +251,6 @@ class Document:
273
251
  "Is this document a potentially historic version of a document, or is it the main document itself?"
274
252
  return extract_version(self.uri) != 0
275
253
 
276
- def content_as_html(
277
- self,
278
- version_uri: Optional[DocumentURIString] = None,
279
- query: Optional[str] = None,
280
- ) -> str:
281
- try:
282
- results = self.api_client.eval_xslt(
283
- self.uri,
284
- version_uri,
285
- show_unpublished=True,
286
- query=query,
287
- )
288
- multipart_data = decoder.MultipartDecoder.from_response(results)
289
- return str(multipart_data.parts[0].text)
290
- except GatewayTimeoutError as e:
291
- if query is not None:
292
- warnings.warn(
293
- (
294
- "Gateway timeout when getting content with query"
295
- "highlighting for document %s, version %s, and query"
296
- '"%s", falling back to unhighlighted content...'
297
- )
298
- % (self.uri, version_uri, query),
299
- GatewayTimeoutGettingHTMLWithQuery,
300
- )
301
- return self.content_as_html(version_uri)
302
- raise e
303
-
304
- def number_of_mentions(self, query: str) -> int:
305
- html = self.content_as_html(query=query)
306
- tree = html_parser.fromstring(html.encode("utf-8"))
307
- return len(tree.findall(".//mark"))
308
-
309
254
  @cached_property
310
255
  def is_failure(self) -> bool:
311
256
  """
@@ -550,3 +495,21 @@ class Document:
550
495
  return getattr(self.body, name)
551
496
  except Exception:
552
497
  raise AttributeError(f"Neither 'Document' nor 'DocumentBody' objects have an attribute '{name}'")
498
+
499
+ def linked_document_resolutions(self, namespaces: list[str], only_published: bool = True) -> IdentifierResolutions:
500
+ """Get documents which share the same neutral citation as this document."""
501
+ if not hasattr(self, "neutral_citation") or not self.neutral_citation:
502
+ return IdentifierResolutions([])
503
+
504
+ resolutions = self.api_client.resolve_from_identifier_value(self.neutral_citation)
505
+ if only_published:
506
+ resolutions = resolutions.published()
507
+
508
+ # only documents which aren't this one and have a right namespace
509
+ return IdentifierResolutions(
510
+ [
511
+ resolution
512
+ for resolution in resolutions
513
+ if resolution.document_uri != self.uri.as_marklogic() and resolution.identifier_namespace in namespaces
514
+ ]
515
+ )
@@ -12,6 +12,11 @@ from caselawclient.models.utilities.dates import parse_string_date_as_utc
12
12
 
13
13
  from .xml import XML
14
14
 
15
+ DEFAULT_NAMESPACES = {
16
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
17
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
18
+ }
19
+
15
20
 
16
21
  class UnparsableDate(Warning):
17
22
  pass
@@ -26,35 +31,25 @@ class DocumentBody:
26
31
  self._xml = XML(xml_bytestring=xml_bytestring)
27
32
  """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
28
33
 
29
- def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
34
+ def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
30
35
  return self._xml.get_xpath_match_string(xpath, namespaces)
31
36
 
37
+ def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
38
+ return self._xml.get_xpath_match_strings(xpath, namespaces)
39
+
32
40
  @cached_property
33
41
  def name(self) -> str:
34
- return self._xml.get_xpath_match_string(
35
- "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
36
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
42
+ return self.get_xpath_match_string(
43
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
37
44
  )
38
45
 
39
46
  @cached_property
40
47
  def court(self) -> str:
41
- return self._xml.get_xpath_match_string(
42
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
43
- {
44
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
45
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
46
- },
47
- )
48
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")
48
49
 
49
50
  @cached_property
50
51
  def jurisdiction(self) -> str:
51
- return self._xml.get_xpath_match_string(
52
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()",
53
- {
54
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
55
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
56
- },
57
- )
52
+ return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
58
53
 
59
54
  @property
60
55
  def court_and_jurisdiction_identifier_string(self) -> CourtCode:
@@ -64,9 +59,8 @@ class DocumentBody:
64
59
 
65
60
  @cached_property
66
61
  def document_date_as_string(self) -> str:
67
- return self._xml.get_xpath_match_string(
62
+ return self.get_xpath_match_string(
68
63
  "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
69
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
70
64
  )
71
65
 
72
66
  @cached_property
@@ -90,9 +84,8 @@ class DocumentBody:
90
84
  name: Optional[str] = None,
91
85
  ) -> list[datetime.datetime]:
92
86
  name_filter = f"[@name='{name}']" if name else ""
93
- iso_datetimes = self._xml.get_xpath_match_strings(
87
+ iso_datetimes = self.get_xpath_match_strings(
94
88
  f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
95
- {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
96
89
  )
97
90
 
98
91
  return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
@@ -130,9 +123,24 @@ class DocumentBody:
130
123
  def content_as_xml(self) -> str:
131
124
  return self._xml.xml_as_string
132
125
 
126
+ @cached_property
127
+ def has_content(self) -> bool:
128
+ """If we do not have a word document, the XML will not contain
129
+ the contents of the judgment, but will contain a preamble."""
130
+ trailing_tags = self._xml.xml_as_tree.xpath("//*[preceding::akn:meta]", namespaces=DEFAULT_NAMESPACES)
131
+ for tag in trailing_tags:
132
+ if tag.tail and tag.tail.strip():
133
+ return True
134
+ if tag.text and tag.text.strip():
135
+ return True
136
+
137
+ return False
138
+
133
139
  @cache
134
- def content_as_html(self, image_base_url: Optional[str] = None) -> str:
140
+ def content_as_html(self, image_base_url: Optional[str] = None) -> Optional[str]:
135
141
  """Convert the XML representation of the Document into HTML for rendering."""
142
+ if not self.has_content:
143
+ return None
136
144
 
137
145
  html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
138
146
 
@@ -4,7 +4,3 @@ class CannotPublishUnpublishableDocument(Exception):
4
4
 
5
5
  class DocumentNotSafeForDeletion(Exception):
6
6
  """A document which is not safe for deletion cannot be deleted."""
7
-
8
-
9
- class InvalidDocumentURIException(Exception):
10
- """The document URI is not valid."""
@@ -5,12 +5,15 @@ from typing import TYPE_CHECKING, Any, Optional
5
5
  from ds_caselaw_utils.types import NeutralCitationString
6
6
 
7
7
  from caselawclient.errors import DocumentNotFoundError
8
+ from caselawclient.identifier_resolution import IdentifierResolutions
8
9
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from caselawclient.models.press_summaries import PressSummary
12
13
 
13
- from .documents import Document, DocumentURIString
14
+ from caselawclient.types import DocumentURIString
15
+
16
+ from .documents import Document
14
17
 
15
18
 
16
19
  class Judgment(NeutralCitationMixin, Document):
@@ -49,3 +52,7 @@ class Judgment(NeutralCitationMixin, Document):
49
52
  return PressSummary(uri, self.api_client)
50
53
  except DocumentNotFoundError:
51
54
  return None
55
+
56
+ @cached_property
57
+ def linked_press_summaries(self, only_published: bool = True) -> "IdentifierResolutions":
58
+ return self.linked_document_resolutions(["uksummaryofncn"], only_published)
@@ -7,9 +7,11 @@ from typing import TYPE_CHECKING, Any, Optional
7
7
  from ds_caselaw_utils.types import NeutralCitationString
8
8
 
9
9
  from caselawclient.errors import DocumentNotFoundError
10
+ from caselawclient.identifier_resolution import IdentifierResolutions
10
11
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
12
+ from caselawclient.types import DocumentURIString
11
13
 
12
- from .documents import Document, DocumentURIString
14
+ from .documents import Document
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from caselawclient.models.judgments import Judgment
@@ -50,3 +52,6 @@ class PressSummary(NeutralCitationMixin, Document):
50
52
  return Judgment(uri, self.api_client)
51
53
  except DocumentNotFoundError:
52
54
  return None
55
+
56
+ def linked_judgments(self, only_published: bool = True) -> "IdentifierResolutions":
57
+ return self.linked_document_resolutions(["ukncn"], only_published)
@@ -2,7 +2,7 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import uuid
5
- from typing import TYPE_CHECKING, Any, Literal, Optional, TypedDict, overload
5
+ from typing import Any, Literal, Optional, TypedDict, overload
6
6
 
7
7
  import boto3
8
8
  import botocore.client
@@ -13,10 +13,7 @@ from mypy_boto3_sns.client import SNSClient
13
13
  from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
14
14
  from typing_extensions import NotRequired
15
15
 
16
- if TYPE_CHECKING:
17
- from caselawclient.models.documents import DocumentURIString
18
- else:
19
- DocumentURIString = None
16
+ from caselawclient.types import DocumentURIString
20
17
 
21
18
  env = environ.Env()
22
19
 
@@ -4,8 +4,8 @@ import ds_caselaw_utils as caselawutils
4
4
  from ds_caselaw_utils.types import NeutralCitationString
5
5
 
6
6
  from caselawclient.errors import MarklogicAPIError
7
- from caselawclient.models.documents import DocumentURIString
8
7
  from caselawclient.models.utilities.aws import copy_assets
8
+ from caselawclient.types import DocumentURIString
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from caselawclient.Client import MarklogicApiClient
@@ -12,7 +12,7 @@ from ds_caselaw_utils.types import CourtCode, JurisdictionCode
12
12
  from lxml import etree
13
13
 
14
14
  from caselawclient.Client import MarklogicApiClient
15
- from caselawclient.models.documents import DocumentURIString
15
+ from caselawclient.types import DocumentURIString
16
16
  from caselawclient.xml_helpers import get_xpath_match_string
17
17
 
18
18
 
caselawclient/types.py ADDED
@@ -0,0 +1,31 @@
1
+ class InvalidDocumentURIException(Exception):
2
+ """The document URI is not valid."""
3
+
4
+
5
+ class MarkLogicDocumentURIString(str):
6
+ pass
7
+
8
+
9
+ class DocumentURIString(str):
10
+ """
11
+ This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string.
12
+ """
13
+
14
+ def __new__(cls, content: str) -> "DocumentURIString":
15
+ # Check that the URI doesn't begin or end with a slash
16
+ if content[0] == "/" or content[-1] == "/":
17
+ raise InvalidDocumentURIException(
18
+ f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.'
19
+ )
20
+
21
+ # Check that the URI doesn't contain a full stop
22
+ if "." in content:
23
+ raise InvalidDocumentURIException(
24
+ f'"{content}" is not a valid document URI; URIs cannot contain full stops.'
25
+ )
26
+
27
+ # If everything is good, return as usual
28
+ return str.__new__(cls, content)
29
+
30
+ def as_marklogic(self) -> MarkLogicDocumentURIString:
31
+ return MarkLogicDocumentURIString(f"/{self}.xml")
@@ -0,0 +1,17 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare namespace xdmp="http://marklogic.com/xdmp";
4
+ declare variable $identifier_value as xs:string external;
5
+ declare variable $published_only as xs:int? external := 1;
6
+
7
+ let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
8
+ let $query := "SELECT * from compiled_url_slugs WHERE (identifier_value = @value)" || $published_query
9
+
10
+ return xdmp:sql(
11
+ $query,
12
+ "map",
13
+ map:new((
14
+ map:entry("value", $identifier_value)
15
+ ))
16
+ )
17
+
@@ -7,9 +7,9 @@ checks. They are used to enforce appropriately typed variables being passed in t
7
7
  """
8
8
 
9
9
  from typing import Any, NewType, Optional, TypedDict
10
- from caselawclient.models.documents import DocumentURIString
10
+ from caselawclient.types import DocumentURIString
11
+ from caselawclient.types import MarkLogicDocumentURIString as MarkLogicDocumentURIString
11
12
 
12
- MarkLogicDocumentURIString = NewType("MarkLogicDocumentURIString", str)
13
13
  MarkLogicDocumentVersionURIString = NewType("MarkLogicDocumentVersionURIString", MarkLogicDocumentURIString)
14
14
 
15
15
  MarkLogicPrivilegeURIString = NewType("MarkLogicPrivilegeURIString", str)
@@ -141,12 +141,18 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
141
141
  uri: MarkLogicDocumentURIString
142
142
 
143
143
 
144
- # resolve_from_identifier.xqy
145
- class ResolveFromIdentifierDict(MarkLogicAPIDict):
144
+ # resolve_from_identifier_slug.xqy
145
+ class ResolveFromIdentifierSlugDict(MarkLogicAPIDict):
146
146
  identifier_uri: DocumentURIString
147
147
  published_only: Optional[int]
148
148
 
149
149
 
150
+ # resolve_from_identifier_value.xqy
151
+ class ResolveFromIdentifierValueDict(MarkLogicAPIDict):
152
+ identifier_value: str
153
+ published_only: Optional[int]
154
+
155
+
150
156
  # set_boolean_property.xqy
151
157
  class SetBooleanPropertyDict(MarkLogicAPIDict):
152
158
  name: str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 29.2.0
3
+ Version: 31.0.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
- Requires-Dist: certifi (>=2024.12.14,<2024.13.0)
14
+ Requires-Dist: certifi (>=2025.1.31,<2025.2.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.12.0)
17
17
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
@@ -1,15 +1,15 @@
1
- caselawclient/Client.py,sha256=6BaVpKSvMcNhSaDXzTkN1h4iV7KFrY7SgSHNGqGQ_2o,43863
1
+ caselawclient/Client.py,sha256=JBtcmyuY8tHm1pj62BkmtLSjsZ825ver9sefTLhltIc,45005
2
2
  caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
3
3
  caselawclient/client_helpers/__init__.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
5
5
  caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
6
6
  caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
7
- caselawclient/factories.py,sha256=6-xZMVmvtXA8AnyWJgJTums1EWfM6lPIhrWQu0NopJo,4472
8
- caselawclient/identifier_resolution.py,sha256=IOqrZcIHoHhNOCAkNveOBcWddBNpkOB8cz1r0zFa8mQ,1829
7
+ caselawclient/factories.py,sha256=_ey9KTu393H9q1hWAjr5LIp4oYzDre55QpVMkmxNSb0,6223
8
+ caselawclient/identifier_resolution.py,sha256=pqapUH8oiZF3ie-s_CI0hvZwH__JVcjJ4VxkpBxswmA,2354
9
9
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
10
- caselawclient/models/documents/__init__.py,sha256=huZ-aQ-tsfuWScqSPkBAAQ9dhYa98fdd5eHwO85s_BQ,19456
11
- caselawclient/models/documents/body.py,sha256=7Sj6lnddcAM8VAeecAS2VEhQHR9CB3G0fL8TAXz7EXw,5588
12
- caselawclient/models/documents/exceptions.py,sha256=rw1xId16vBKvBImgFmFUpeFgKqU7VTNtVLIEVBPGKyk,374
10
+ caselawclient/models/documents/__init__.py,sha256=LNo9FVKkF9rGzmJ4swEKrKXvimpvenE3OU_Z3Rt6Jic,18230
11
+ caselawclient/models/documents/body.py,sha256=mhPOV1cOF3RJr69UzNPlo1KrzePaj7KDPYi1exP06L0,5880
12
+ caselawclient/models/documents/exceptions.py,sha256=Mz1P8uNqf5w6uLnRwJt6xK7efsVqtd5VA-WXUUH7QLk,285
13
13
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
14
14
  caselawclient/models/documents/transforms/html.xsl,sha256=oSSO-IBX4qLiSWexQYmWJfGNevF09aCBx4D1NYqXxpo,38322
15
15
  caselawclient/models/documents/xml.py,sha256=HlmPb63lLMnySSOLP4iexcAyQiLByKBZtTd25f8sY8M,1268
@@ -18,19 +18,20 @@ caselawclient/models/identifiers/fclid.py,sha256=pTO586ra0sr4DbjHSxuI8UlxfNXLm9n
18
18
  caselawclient/models/identifiers/neutral_citation.py,sha256=3Jw1_-NmGfGmrWGFSzLdTHBYHIHq4tPkF8U7Jba-jGo,1848
19
19
  caselawclient/models/identifiers/press_summary_ncn.py,sha256=r55-qgi9LDnGxY8vTKijzotGknA6mNLpu55QQTV8Lxo,652
20
20
  caselawclient/models/identifiers/unpacker.py,sha256=xvp480QESbN36NEc6qeo-orqOBq6WchnLI7thY7A1qs,2156
21
- caselawclient/models/judgments.py,sha256=xFjfOspa9ZL29gvvGVNq11JC7h-LKebFMrQYvVIIoEI,1868
21
+ caselawclient/models/judgments.py,sha256=H_-t4mCa3LdYu1cLOhQB5n045RdJghWpqS5hgPPDE4U,2170
22
22
  caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_JfwkbgbSIXaqFVQahfu95do,2086
23
- caselawclient/models/press_summaries.py,sha256=bEqJxu-7eBLhwulOsDXYDl2ptIp3RkkMpOcdVvWB5ds,1836
23
+ caselawclient/models/press_summaries.py,sha256=a50KdX_v41G_eI5h6_HuA2hvrhOA2EDvoTYBi5va1e8,2101
24
24
  caselawclient/models/utilities/__init__.py,sha256=u3yIhbTjFQ1JJyAm5wsMEBswWl4t6Z7UMORF5FqC2xQ,1257
25
- caselawclient/models/utilities/aws.py,sha256=5UJhxdGwYwH0bswi3q9nzmXcioPz9t_lNWYmP45RyK4,8842
25
+ caselawclient/models/utilities/aws.py,sha256=E4nFcNC2xxPUv0Xkfi2XTO3FyIH6jaAy9pgsvR48Eg8,8759
26
26
  caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
27
- caselawclient/models/utilities/move.py,sha256=Rsx1eGHVjbGz0WMVDjy8b_5t4Ig8aP55sLudL07MVUs,3621
27
+ caselawclient/models/utilities/move.py,sha256=MXdUqkSiyqRb8YKs_66B6ICWn8EWM6DiJV95fuJO1Us,3610
28
28
  caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
30
30
  caselawclient/responses/search_response.py,sha256=Z76Zj4VvM-EV_vdiehv2-Jfkr9HZD3SvCTlRrUB_cyE,1951
31
- caselawclient/responses/search_result.py,sha256=2yR3FP4CQsVymE7RrOMbh1owjYaRTrqkjMObbIkSlhE,8216
31
+ caselawclient/responses/search_result.py,sha256=hotJHJ9wQusjb4PjZm1DDZv2HEsXWTtZU5TOTj2T0rw,8205
32
32
  caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
33
33
  caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
34
+ caselawclient/types.py,sha256=gedGlrO1ZA_wYdCuquwi6seFr5OqTPxi1O0D_lKdEps,1087
34
35
  caselawclient/xml_helpers.py,sha256=FEtE8gxaEZmcgua-Xu8awPmiOm9K58OSabEYVGpiVEY,493
35
36
  caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
36
37
  caselawclient/xquery/checkin_judgment.xqy,sha256=QeGqO3kL-q0UrjopCVU0lCbkwbyoc5SuNLYFAIbbyMg,197
@@ -59,7 +60,8 @@ caselawclient/xquery/get_version_annotation.xqy,sha256=pFDMGA9SxI59iUPaoAeUsq23k
59
60
  caselawclient/xquery/get_version_created.xqy,sha256=bRweaXFtwMBNzL16SlOdiOxHkbqNUwpwDHLxpZYVCh0,250
60
61
  caselawclient/xquery/insert_document.xqy,sha256=iP2xTaLGa-u6X9KfS1yJ6yPCKQUWQFYdEW1S4YcMY7w,531
61
62
  caselawclient/xquery/list_judgment_versions.xqy,sha256=WShga8igeD21hSLfVSvCOiDMPDhNH6KGf1OW6G0SAkY,190
62
- caselawclient/xquery/resolve_from_identifier.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
63
+ caselawclient/xquery/resolve_from_identifier_slug.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
64
+ caselawclient/xquery/resolve_from_identifier_value.xqy,sha256=7uP3DnRi67qSp0aUhW6Cv_GA8BQGw6GuvtAghjrT7Z4,493
63
65
  caselawclient/xquery/set_boolean_property.xqy,sha256=8Vg3yDWqeDynUJQHw2OF4daDIKTnp8ARol1_OCqY0Dk,355
64
66
  caselawclient/xquery/set_metadata_citation.xqy,sha256=ImwijXowvOCiH_br_LepnKsEpys9tg4Cf3uz6MoC5-c,659
65
67
  caselawclient/xquery/set_metadata_court.xqy,sha256=xQGR3e4pdJuDPMlzdAdzrBDSeQbEFiLVIm2z_KQI_Ds,996
@@ -77,8 +79,8 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
77
79
  caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
78
80
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
79
81
  caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
80
- caselawclient/xquery_type_dicts.py,sha256=kybL-YzwK34Fr6MeWfqVOJHYrs0ZNeDWXDsp8o2Yb1U,6114
81
- ds_caselaw_marklogic_api_client-29.2.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
82
- ds_caselaw_marklogic_api_client-29.2.0.dist-info/METADATA,sha256=ALbG5q-13rqmY9R-1lUB7LLUnZppeBT4A9xssTV5skM,4264
83
- ds_caselaw_marklogic_api_client-29.2.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
84
- ds_caselaw_marklogic_api_client-29.2.0.dist-info/RECORD,,
82
+ caselawclient/xquery_type_dicts.py,sha256=IElFK9aFZboNgKytRjK2EQPIJJcXXw9NVfRTWKx2zJo,6283
83
+ ds_caselaw_marklogic_api_client-31.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
84
+ ds_caselaw_marklogic_api_client-31.0.0.dist-info/METADATA,sha256=QR0bbqSK9ZROZon7RZII--oK8fBuKoMRk94JTOE57Zc,4262
85
+ ds_caselaw_marklogic_api_client-31.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
86
+ ds_caselaw_marklogic_api_client-31.0.0.dist-info/RECORD,,