ds-caselaw-marklogic-api-client 39.0.0__py3-none-any.whl → 39.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

caselawclient/Client.py CHANGED
@@ -7,11 +7,12 @@ import warnings
7
7
  from datetime import datetime, time, timedelta
8
8
  from pathlib import Path
9
9
  from typing import Any, Optional, Type, Union
10
- from xml.etree import ElementTree
11
- from xml.etree.ElementTree import Element, ParseError, fromstring
10
+ from xml.etree.ElementTree import Element
12
11
 
13
12
  import environ
14
13
  import requests
14
+ from defusedxml import ElementTree
15
+ from defusedxml.ElementTree import ParseError, fromstring
15
16
  from ds_caselaw_utils.types import NeutralCitationString
16
17
  from lxml import etree
17
18
  from requests.auth import HTTPBasicAuth
@@ -258,10 +259,12 @@ class MarklogicApiClient:
258
259
  return "Unknown error, Marklogic returned a null or empty response"
259
260
  try:
260
261
  xml = fromstring(content_as_xml)
261
- return xml.find(
262
- "message-code",
263
- namespaces={"": "http://marklogic.com/xdmp/error"},
264
- ).text # type: ignore
262
+ return str(
263
+ xml.find(
264
+ "message-code",
265
+ namespaces={"": "http://marklogic.com/xdmp/error"},
266
+ ).text
267
+ )
265
268
  except (ParseError, TypeError, AttributeError):
266
269
  return "Unknown error, Marklogic returned a null or empty response"
267
270
 
@@ -682,10 +685,12 @@ class MarklogicApiClient:
682
685
  if content == "":
683
686
  return None
684
687
  response_xml = ElementTree.fromstring(content)
685
- return response_xml.find(
686
- "dls:annotation",
687
- namespaces={"dls": "http://marklogic.com/xdmp/dls"},
688
- ).text # type: ignore
688
+ return str(
689
+ response_xml.find(
690
+ "dls:annotation",
691
+ namespaces={"dls": "http://marklogic.com/xdmp/dls"},
692
+ ).text
693
+ )
689
694
 
690
695
  def get_judgment_version(
691
696
  self,
@@ -63,6 +63,9 @@ class Document:
63
63
  document_noun_plural = "documents"
64
64
  """ The noun for a plural of this document type. """
65
65
 
66
+ _default_reparse_document_type: Optional[str] = None
67
+ """ The default noun to pass to the parser when reparsing given the document type if known. This is used to determine how the document should be parsed and processed."""
68
+
66
69
  type_collection_name: str
67
70
 
68
71
  attributes_to_validate: list[tuple[str, bool, str]] = [
@@ -465,7 +468,6 @@ class Document:
465
468
  now = datetime.datetime.now(datetime.timezone.utc)
466
469
  self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
467
470
 
468
- parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
469
471
  checked_date: Optional[str] = (
470
472
  self.body.document_date_as_date.isoformat()
471
473
  if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
@@ -477,16 +479,18 @@ class Document:
477
479
  # values are "" from the API, we should pass None instead in this case.
478
480
 
479
481
  parser_instructions: ParserInstructionsDict = {
480
- "documentType": parser_type_noun,
481
482
  "metadata": {
482
483
  "name": self.body.name or None,
483
484
  "cite": None,
484
485
  "court": self.body.court or None,
485
486
  "date": checked_date,
486
487
  "uri": self.uri,
487
- },
488
+ }
488
489
  }
489
490
 
491
+ if self._default_reparse_document_type:
492
+ parser_instructions["documentType"] = self._default_reparse_document_type
493
+
490
494
  ## TODO: Remove this hack around the fact that NCNs are assumed to be present for all documents' metadata, but actually different document classes may have different metadata
491
495
  if hasattr(self, "neutral_citation"):
492
496
  parser_instructions["metadata"]["cite"] = self.neutral_citation
@@ -24,6 +24,7 @@ class Judgment(NeutralCitationMixin, Document):
24
24
  document_noun = "judgment"
25
25
  document_noun_plural = "judgments"
26
26
  type_collection_name = "judgment"
27
+ _default_reparse_document_type = "judgment"
27
28
 
28
29
  def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
29
30
  super().__init__(self.document_noun, uri, *args, **kwargs)
@@ -11,3 +11,4 @@ class ParserLog(Document):
11
11
  document_noun = "parser log"
12
12
  document_noun_plural = "parser logs"
13
13
  type_collection_name = "parser-log"
14
+ _default_reparse_document_type = "parserLog"
@@ -25,6 +25,7 @@ class PressSummary(NeutralCitationMixin, Document):
25
25
  document_noun = "press summary"
26
26
  document_noun_plural = "press summaries"
27
27
  type_collection_name = "press-summary"
28
+ _default_reparse_document_type = "pressSummary"
28
29
 
29
30
  def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
30
31
  super().__init__(self.document_noun, uri, *args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 39.0.0
3
+ Version: 39.1.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.13
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
14
  Requires-Dist: certifi (>=2025.7.9,<2025.8.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
+ Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
16
17
  Requires-Dist: django-environ (>=0.12.0)
17
18
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
18
19
  Requires-Dist: idna (>=3.4,<4.0)
@@ -1,4 +1,4 @@
1
- caselawclient/Client.py,sha256=AGF7ud4x2hZFm6H--IUXa-n5woOWYmWz2yDzbPPCGdA,45193
1
+ caselawclient/Client.py,sha256=A007KPqb-sn1pUMMVnKQuUfwCSPB8yipVxKhaWECsLY,45282
2
2
  caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
3
3
  caselawclient/client_helpers/__init__.py,sha256=eucyUXwUqI72TPw-C5zLcHlMu4GtFY507a6lQc03lQY,5053
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
@@ -7,7 +7,7 @@ caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
7
7
  caselawclient/factories.py,sha256=eGj9TiZpmF3todW-08Ps7bHNMvByHqwEbgujRhvU_Yc,7382
8
8
  caselawclient/identifier_resolution.py,sha256=B5I1sD7o7YjzsXMECjbKjgiGLDda5bGhejsJ-lYpTIg,2429
9
9
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
10
- caselawclient/models/documents/__init__.py,sha256=0MYorde8Ku9qUWLa5G_aQUBKwDeTRZ_1ebsV9RxIBuI,21256
10
+ caselawclient/models/documents/__init__.py,sha256=MTSXGaHqsM2xuwgCuBPOskk0B_MPwd54oBZcVfPIA1A,21471
11
11
  caselawclient/models/documents/body.py,sha256=O1ZTV3KHo-YNi7Syd4oCV1CVSuRF7mcLXojwshyY4jg,6601
12
12
  caselawclient/models/documents/exceptions.py,sha256=te7PPQTDHjZ9EYVg5pVaiZfF00lMBFy333PHj8_mkC4,443
13
13
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
@@ -20,10 +20,10 @@ caselawclient/models/identifiers/fclid.py,sha256=hj8z-VhXFrUHKOY6k_ItPvOakIvbhJ5
20
20
  caselawclient/models/identifiers/neutral_citation.py,sha256=bYAeXHVm_ls0aDTeYI4uv35iZmJGSKU4-H-iLh2xED0,2912
21
21
  caselawclient/models/identifiers/press_summary_ncn.py,sha256=t-x6PsEe2tz1uO1qZKXKK0TugkQYb_49O_xgjd_oiE4,801
22
22
  caselawclient/models/identifiers/unpacker.py,sha256=OpFBw1B6pqSuzcyHbnTY3dScHc2Ujt5StGRnh-tKE1Q,2592
23
- caselawclient/models/judgments.py,sha256=r40irgdEID-NeSNLm3OUdUBznMpRSwjD2SJrGlBgP8o,2208
23
+ caselawclient/models/judgments.py,sha256=esg017Z2vAevk17jCyGhr8n9fa4FEoMz2SDFgeisV1Y,2256
24
24
  caselawclient/models/neutral_citation_mixin.py,sha256=jAac3PPuWyPdj9N-n-U_JfwkbgbSIXaqFVQahfu95do,2086
25
- caselawclient/models/parser_logs.py,sha256=30kF4w0GcowiMIFtymUkl7ZARanNh_PjDpJZezn-cA8,315
26
- caselawclient/models/press_summaries.py,sha256=PIq9RceZ7n7Z079tESfxhQbfxCmtTc2V2OeFtcn594s,2144
25
+ caselawclient/models/parser_logs.py,sha256=iOhKTAAi87XQvxz1DHjF2lrqScD19g_c8EjSf0vPdfs,364
26
+ caselawclient/models/press_summaries.py,sha256=rtrYs_3BazUXxdA2oYmIJ6YIAiVlKeyc1aSF9uvkJJU,2196
27
27
  caselawclient/models/utilities/__init__.py,sha256=LPhyrQwLKc5tIJUO8Bysn9wCiR6Z6jMMTksjOV4JH9U,1041
28
28
  caselawclient/models/utilities/aws.py,sha256=NTF2W2aNgbO72e5WklXZC2U2_GPbVeynjTS1Nqu6DcE,8561
29
29
  caselawclient/models/utilities/dates.py,sha256=WwORxVjUHM1ZFcBF6Qtwo3Cj0sATsnSECkUZ6ls1N1Q,492
@@ -86,7 +86,7 @@ caselawclient/xquery/xslt_transform.xqy,sha256=cccaFiGkCcvSfDv007UriZ3I4ak2nTLP1
86
86
  caselawclient/xquery_type_dicts.py,sha256=zuyDGTkcN6voOXCm3APXItZ-Ey6tZ2hdZummZWzjl50,6489
87
87
  caselawclient/xslt/modify_xml_live.xsl,sha256=gNjwBun2-UzOeeuf0wNjFtN3jXm1yrwqv_KT8r1slXw,2370
88
88
  caselawclient/xslt/sample.xsl,sha256=IG-v77stjwqiw25pguh391K-5DTKiX651WqILDZixm0,825
89
- ds_caselaw_marklogic_api_client-39.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
90
- ds_caselaw_marklogic_api_client-39.0.0.dist-info/METADATA,sha256=hn5g5HHm3R3WIpBTeSKsv0IY6uVxKY6m7AkCo-pr_y4,4319
91
- ds_caselaw_marklogic_api_client-39.0.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
92
- ds_caselaw_marklogic_api_client-39.0.0.dist-info/RECORD,,
89
+ ds_caselaw_marklogic_api_client-39.1.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
90
+ ds_caselaw_marklogic_api_client-39.1.0.dist-info/METADATA,sha256=y6C7CqUYSY2LHCM5RSuzZP6z2sZjRbkg6B1bqhLLJA8,4362
91
+ ds_caselaw_marklogic_api_client-39.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
92
+ ds_caselaw_marklogic_api_client-39.1.0.dist-info/RECORD,,