ds-caselaw-marklogic-api-client 22.0.2__py3-none-any.whl → 23.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
caselawclient/Client.py CHANGED
@@ -647,6 +647,22 @@ class MarklogicApiClient:
647
647
 
648
648
  return self._send_to_eval(vars, "get_judgment_version.xqy")
649
649
 
650
+ def validate_document(self, document_uri: DocumentURIString) -> bool:
651
+ vars: query_dicts.ValidateDocumentDict = {
652
+ "uri": self._format_uri_for_marklogic(document_uri)
653
+ }
654
+ response = self._send_to_eval(vars, "validate_document.xqy")
655
+ content = decoder.MultipartDecoder.from_response(response).parts[0].text
656
+ xml = ElementTree.fromstring(content)
657
+ return (
658
+ len(
659
+ xml.findall(
660
+ ".//error:error", {"error": "http://marklogic.com/xdmp/error"}
661
+ ),
662
+ )
663
+ == 0
664
+ )
665
+
650
666
  def eval(
651
667
  self, xquery_path: str, vars: str, accept_header: str = "multipart/mixed"
652
668
  ) -> requests.Response:
@@ -22,6 +22,7 @@ from .utilities import VersionsDict, render_versions
22
22
  from .utilities.aws import (
23
23
  ParserInstructionsDict,
24
24
  announce_document_event,
25
+ check_docx_exists,
25
26
  delete_documents_from_private_bucket,
26
27
  generate_docx_url,
27
28
  generate_pdf_url,
@@ -31,6 +32,8 @@ from .utilities.aws import (
31
32
  uri_for_s3,
32
33
  )
33
34
 
35
+ MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
36
+
34
37
 
35
38
  class UnparsableDate(Warning):
36
39
  pass
@@ -165,13 +168,17 @@ class Document:
165
168
  :return: `True` if the document exists, `False` otherwise."""
166
169
  return self.api_client.document_exists(self.uri)
167
170
 
171
+ def docx_exists(self) -> bool:
172
+ """There is a docx in S3 private bucket for this Document"""
173
+ return check_docx_exists(self.uri)
174
+
168
175
  @property
169
176
  def best_human_identifier(self) -> Optional[str]:
170
177
  """
171
178
  Some identifier that is understood by legal professionals to refer to this legal event
172
179
  that is not the name of the document.
173
180
  Typically, this will be the neutral citation number, should it exist.
174
- Should typically overridden in subclasses.
181
+ Should typically be overridden in subclasses.
175
182
  """
176
183
  return None
177
184
 
@@ -270,10 +277,12 @@ class Document:
270
277
 
271
278
  @cached_property
272
279
  def transformation_datetime(self) -> Optional[datetime.datetime]:
280
+ """When was this document successfully parsed or reparsed (date from XML)"""
273
281
  return self.get_latest_manifestation_datetime("transform")
274
282
 
275
283
  @cached_property
276
284
  def enrichment_datetime(self) -> Optional[datetime.datetime]:
285
+ """When was this document successfully enriched (date from XML)"""
277
286
  return self.get_latest_manifestation_datetime("tna-enriched")
278
287
 
279
288
  @cached_property
@@ -480,9 +489,9 @@ class Document:
480
489
 
481
490
  return DOCUMENT_STATUS_NEW
482
491
 
483
- def enrich(self) -> None:
492
+ def force_enrich(self) -> None:
484
493
  """
485
- Request enrichment of the document
494
+ Request enrichment of the document, but do no checks
486
495
  """
487
496
  now = datetime.datetime.now(datetime.timezone.utc)
488
497
  self.api_client.set_property(
@@ -495,6 +504,42 @@ class Document:
495
504
  enrich=True,
496
505
  )
497
506
 
507
+ def enrich(self) -> bool:
508
+ """
509
+ Request enrichment of a document, if it's sensible to do so.
510
+ """
511
+ if self.can_enrich:
512
+ self.force_enrich()
513
+ return True
514
+ return False
515
+
516
+ @cached_property
517
+ def can_enrich(self) -> bool:
518
+ """
519
+ Is it sensible to enrich this document?
520
+ """
521
+ if (self.enriched_recently is False) and self.validates_against_schema:
522
+ return True
523
+ return False
524
+
525
+ @cached_property
526
+ def enriched_recently(self) -> bool:
527
+ """
528
+ Has this document been enriched recently?
529
+ """
530
+ last_enrichment = self.enrichment_datetime
531
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
532
+ if last_enrichment and now - last_enrichment < MINIMUM_ENRICHMENT_TIME:
533
+ return True
534
+ return False
535
+
536
+ @cached_property
537
+ def validates_against_schema(self) -> bool:
538
+ """
539
+ Does the document validate against the most recent schema?
540
+ """
541
+ return self.api_client.validate_document(self.uri)
542
+
498
543
  def publish(self) -> None:
499
544
  """
500
545
  :raises CannotPublishUnpublishableDocument: This document has not passed the checks in `is_publishable`, and as
@@ -553,7 +598,7 @@ class Document:
553
598
  def move(self, new_citation: str) -> None:
554
599
  self.api_client.update_document_uri(self.uri, new_citation)
555
600
 
556
- def reparse(self) -> None:
601
+ def force_reparse(self) -> None:
557
602
  "Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
558
603
 
559
604
  now = datetime.datetime.now(datetime.timezone.utc)
@@ -589,6 +634,21 @@ class Document:
589
634
  parser_instructions=parser_instructions,
590
635
  )
591
636
 
637
+ def reparse(self) -> bool:
638
+ if self.can_reparse:
639
+ self.force_reparse()
640
+ return True
641
+ return False
642
+
643
+ @cached_property
644
+ def can_reparse(self) -> bool:
645
+ """
646
+ Is it sensible to reparse this document?
647
+ """
648
+ if self.docx_exists():
649
+ return True
650
+ return False
651
+
592
652
  class XML:
593
653
  """
594
654
  Represents the XML of a document, and should contain all methods for interacting with it.
@@ -1,8 +1,13 @@
1
+ import importlib
1
2
  from functools import cached_property
2
- from typing import Any
3
+ from typing import TYPE_CHECKING, Any, Optional
3
4
 
5
+ from caselawclient.errors import DocumentNotFoundError
4
6
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
5
7
 
8
+ if TYPE_CHECKING:
9
+ from caselawclient.models.press_summaries import PressSummary
10
+
6
11
  from ..xml_helpers import get_xpath_match_string
7
12
  from .documents import Document
8
13
 
@@ -32,3 +37,18 @@ class Judgment(NeutralCitationMixin, Document):
32
37
  @property
33
38
  def best_human_identifier(self) -> str:
34
39
  return self.neutral_citation
40
+
41
+ @cached_property
42
+ def linked_document(self) -> Optional["PressSummary"]:
43
+ """
44
+ Attempt to fetch a linked press summary, and return it, if it exists
45
+ """
46
+ try:
47
+ uri = self.uri + "/press-summary/1"
48
+ PressSummary = getattr(
49
+ importlib.import_module("caselawclient.models.press_summaries"),
50
+ "PressSummary",
51
+ )
52
+ return PressSummary(uri, self.api_client) # type: ignore
53
+ except DocumentNotFoundError:
54
+ return None
@@ -1,11 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
1
4
  from functools import cached_property
2
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any, Optional
3
6
 
7
+ from caselawclient.errors import DocumentNotFoundError
4
8
  from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin
5
9
  from caselawclient.xml_helpers import get_xpath_match_string
6
10
 
7
11
  from .documents import Document
8
12
 
13
+ if TYPE_CHECKING:
14
+ from caselawclient.models.judgments import Judgment
15
+
9
16
 
10
17
  class PressSummary(NeutralCitationMixin, Document):
11
18
  """
@@ -31,3 +38,18 @@ class PressSummary(NeutralCitationMixin, Document):
31
38
  @property
32
39
  def best_human_identifier(self) -> str:
33
40
  return self.neutral_citation
41
+
42
+ @cached_property
43
+ def linked_document(self) -> Optional["Judgment"]:
44
+ """
45
+ Attempt to fetch a linked judgement, and return it, if it exists
46
+ """
47
+ try:
48
+ uri = self.uri.removesuffix("/press-summary/1")
49
+ Judgment = getattr(
50
+ importlib.import_module("caselawclient.models.judgments"),
51
+ "Judgment",
52
+ )
53
+ return Judgment(uri, self.api_client) # type: ignore
54
+ except DocumentNotFoundError:
55
+ return None
@@ -78,6 +78,20 @@ def generate_signed_asset_url(key: str) -> str:
78
78
  )
79
79
 
80
80
 
81
+ def check_docx_exists(uri: str) -> bool:
82
+ """Does the docx for a document URI actually exist?"""
83
+ bucket = env("PRIVATE_ASSET_BUCKET", None)
84
+ s3_key = generate_docx_key(uri)
85
+ client = create_s3_client()
86
+ try:
87
+ client.head_object(Bucket=bucket, Key=s3_key)
88
+ return True
89
+ except botocore.exceptions.ClientError as e:
90
+ if e.response["Error"]["Code"] == "404":
91
+ return False
92
+ raise
93
+
94
+
81
95
  def generate_docx_key(uri: str) -> str:
82
96
  """from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
83
97
  return f'{uri}/{uri.replace("/", "_")}.docx'
@@ -227,7 +241,7 @@ def request_parse(
227
241
  .replace("+00:00", "Z"),
228
242
  "function": "fcl-judgment-parse-request",
229
243
  "producer": "FCL",
230
- "executionId": f"fcl_ex_id_{uuid.uuid4()}",
244
+ "executionId": str(uuid.uuid4()),
231
245
  "parentExecutionId": None,
232
246
  },
233
247
  "parameters": {
@@ -45,3 +45,20 @@ class SearchResponse:
45
45
  "//search:response/search:result", namespaces=self.NAMESPACES
46
46
  )
47
47
  return [SearchResult(result, self.client) for result in results]
48
+
49
+ @property
50
+ def facets(self) -> dict[str, str]:
51
+ """
52
+ Returns search facets from the SearchResponse as a dictionary
53
+
54
+ :return: A flattened dictionary of search facet values
55
+ """
56
+ # TODO: preserve the name of the facet (e.g. "court", "year")
57
+ results = self.node.xpath(
58
+ "//search:response/search:facet/search:facet-value",
59
+ namespaces={"search": "http://marklogic.com/appservices/search"},
60
+ )
61
+ facets_dictionary = {
62
+ result.attrib["name"]: result.attrib["count"] for result in results
63
+ }
64
+ return facets_dictionary
@@ -0,0 +1,7 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare variable $uri as xs:string external;
4
+
5
+ let $judgment := fn:document($uri)
6
+
7
+ return xdmp:validate($judgment)
@@ -211,6 +211,11 @@ class UserHasRoleDict(MarkLogicAPIDict):
211
211
  user: str
212
212
 
213
213
 
214
+ # validate_document.xqy
215
+ class ValidateDocumentDict(MarkLogicAPIDict):
216
+ uri: MarkLogicDocumentURIString
217
+
218
+
214
219
  # xslt.xqy
215
220
  class XsltDict(MarkLogicAPIDict):
216
221
  uri: MarkLogicDocumentURIString
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 22.0.2
3
+ Version: 23.0.1
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -1,20 +1,20 @@
1
- caselawclient/Client.py,sha256=wjB-gPOKq1SzyNpX98Do23MMaBb026oaYcqirxbyPrQ,38739
1
+ caselawclient/Client.py,sha256=hFCJtj2eD5k1nazagmqsYRAkhGSLKW_a92zlYni6VC8,39341
2
2
  caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
3
3
  caselawclient/client_helpers/__init__.py,sha256=6vUjIwi777iaNDBUYwWmpzgAXeFHeXnmmMBniVmjUP8,3830
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=DYgUltPq8fFI2KkLRqH1-8zpbb8_swBFyBvvgBbinig,1514
5
5
  caselawclient/content_hash.py,sha256=DF7ujrQPNf1bTSbK0mIIaC5qx6CmF5I0xlQ7uIG0zYI,2236
6
6
  caselawclient/errors.py,sha256=3rsbOQ11hIhm7-UABcHNMcs9XgcrIzytAP0koyZBLWM,3195
7
7
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
8
- caselawclient/models/documents.py,sha256=NZautWi38T-hlhyyotiHr31hafFL4obCSHc7NuZW_e4,20658
9
- caselawclient/models/judgments.py,sha256=TcAsn27K--QQAfaaUZ8biybB9OeVS__91FRlwaG16HY,1020
8
+ caselawclient/models/documents.py,sha256=olPW1m6yNRZurlxFWbl-SgPGFPHn0wz_SuUAfeaSt0w,22502
9
+ caselawclient/models/judgments.py,sha256=zufIORXux0OaOWs2bbVSd9xR-HSwrMJe5WAhHwJzzwM,1740
10
10
  caselawclient/models/neutral_citation_mixin.py,sha256=G9QS5XZ0tf_VXTxt4Uryy_gZ4eBDsmChqEClLAntIwI,1810
11
- caselawclient/models/press_summaries.py,sha256=5c1jpVhVtmIMN8AeHMywGXvz4H55kKAIUaaaVims6Tw,994
11
+ caselawclient/models/press_summaries.py,sha256=2yx7xOqimjM8NsmueD0Ml3aev2fML3U-UksCRJRqPIM,1726
12
12
  caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
13
- caselawclient/models/utilities/aws.py,sha256=6J6dOhyNzSHkIpAFseJDDNBQTXkTs63qS1W-oXZaqG8,7648
13
+ caselawclient/models/utilities/aws.py,sha256=Mw244oUPeWB3p6Q7UYRKoy3aS0lE9E6lToE4MSVC5mw,8075
14
14
  caselawclient/models/utilities/move.py,sha256=_SKzO1UVXHFIVvWfT4nuCwdov7acp8tYzzEg-vVfUyg,5372
15
15
  caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
17
- caselawclient/responses/search_response.py,sha256=OlzXOEnCg-4HdfOYfeIMdk-UfROOI_Nz-etfznFffok,1335
17
+ caselawclient/responses/search_response.py,sha256=zXc_OSyGfoh_C1NHBEzjhiGj1tUIPaqgRUh7iA9EdY4,1959
18
18
  caselawclient/responses/search_result.py,sha256=BSVE2zBy-sy93fghd_kqwkhgwcnx1mFHY1_HTj-qO70,8096
19
19
  caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
20
20
  caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
@@ -56,10 +56,11 @@ caselawclient/xquery/update_locked_judgment.xqy,sha256=wFDEtKh7MxvPmEHrzOHTJIRlY
56
56
  caselawclient/xquery/user_has_privilege.xqy,sha256=TfqPjhdpXt-4Fo7E2kEYRhfqQm6uES-IuLbAlCxylbg,371
57
57
  caselawclient/xquery/user_has_role.xqy,sha256=52YuFZnXqaDDJs-j_UanpqcLNEiw_m9xb07HvZVUoXY,246
58
58
  caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4tLKQ4FTssRihR07qFk,156
59
+ caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
59
60
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
60
61
  caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
61
- caselawclient/xquery_type_dicts.py,sha256=feXVFODZ8-SKpTrsvySCh1KLgNri4GDwDT75tczlTg0,5482
62
- ds_caselaw_marklogic_api_client-22.0.2.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
63
- ds_caselaw_marklogic_api_client-22.0.2.dist-info/METADATA,sha256=KhlOXn0PRE84dVJ7ILOB-hYPAXg9EQ-eaHaH_zAPwC0,4006
64
- ds_caselaw_marklogic_api_client-22.0.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
65
- ds_caselaw_marklogic_api_client-22.0.2.dist-info/RECORD,,
62
+ caselawclient/xquery_type_dicts.py,sha256=YOrXbEYJU84S-YwergCI12OL5Wrn_wpqMcqWpsQrKek,5590
63
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
64
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/METADATA,sha256=Q1Rq_cBlSoVl5eVGKrAPYRbQ4qfKD7idunQFOaIV33s,4006
65
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
66
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/RECORD,,