ds-caselaw-marklogic-api-client 22.1.0__py3-none-any.whl → 23.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ from .utilities import VersionsDict, render_versions
22
22
  from .utilities.aws import (
23
23
  ParserInstructionsDict,
24
24
  announce_document_event,
25
+ check_docx_exists,
25
26
  delete_documents_from_private_bucket,
26
27
  generate_docx_url,
27
28
  generate_pdf_url,
@@ -167,13 +168,17 @@ class Document:
167
168
  :return: `True` if the document exists, `False` otherwise."""
168
169
  return self.api_client.document_exists(self.uri)
169
170
 
171
+ def docx_exists(self) -> bool:
172
+ """There is a docx in S3 private bucket for this Document"""
173
+ return check_docx_exists(self.uri)
174
+
170
175
  @property
171
176
  def best_human_identifier(self) -> Optional[str]:
172
177
  """
173
178
  Some identifier that is understood by legal professionals to refer to this legal event
174
179
  that is not the name of the document.
175
180
  Typically, this will be the neutral citation number, should it exist.
176
- Should typically overridden in subclasses.
181
+ Should typically be overridden in subclasses.
177
182
  """
178
183
  return None
179
184
 
@@ -272,10 +277,12 @@ class Document:
272
277
 
273
278
  @cached_property
274
279
  def transformation_datetime(self) -> Optional[datetime.datetime]:
280
+ """When was this document successfully parsed or reparsed (date from XML)"""
275
281
  return self.get_latest_manifestation_datetime("transform")
276
282
 
277
283
  @cached_property
278
284
  def enrichment_datetime(self) -> Optional[datetime.datetime]:
285
+ """When was this document successfully enriched (date from XML)"""
279
286
  return self.get_latest_manifestation_datetime("tna-enriched")
280
287
 
281
288
  @cached_property
@@ -591,7 +598,7 @@ class Document:
591
598
  def move(self, new_citation: str) -> None:
592
599
  self.api_client.update_document_uri(self.uri, new_citation)
593
600
 
594
- def reparse(self) -> None:
601
+ def force_reparse(self) -> None:
595
602
  "Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
596
603
 
597
604
  now = datetime.datetime.now(datetime.timezone.utc)
@@ -627,6 +634,21 @@ class Document:
627
634
  parser_instructions=parser_instructions,
628
635
  )
629
636
 
637
+ def reparse(self) -> bool:
638
+ if self.can_reparse:
639
+ self.force_reparse()
640
+ return True
641
+ return False
642
+
643
+ @cached_property
644
+ def can_reparse(self) -> bool:
645
+ """
646
+ Is it sensible to reparse this document?
647
+ """
648
+ if self.docx_exists():
649
+ return True
650
+ return False
651
+
630
652
  class XML:
631
653
  """
632
654
  Represents the XML of a document, and should contain all methods for interacting with it.
@@ -78,6 +78,20 @@ def generate_signed_asset_url(key: str) -> str:
78
78
  )
79
79
 
80
80
 
81
+ def check_docx_exists(uri: str) -> bool:
82
+ """Does the docx for a document URI actually exist?"""
83
+ bucket = env("PRIVATE_ASSET_BUCKET", None)
84
+ s3_key = generate_docx_key(uri)
85
+ client = create_s3_client()
86
+ try:
87
+ client.head_object(Bucket=bucket, Key=s3_key)
88
+ return True
89
+ except botocore.exceptions.ClientError as e:
90
+ if e.response["Error"]["Code"] == "404":
91
+ return False
92
+ raise
93
+
94
+
81
95
  def generate_docx_key(uri: str) -> str:
82
96
  """from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
83
97
  return f'{uri}/{uri.replace("/", "_")}.docx'
@@ -227,7 +241,7 @@ def request_parse(
227
241
  .replace("+00:00", "Z"),
228
242
  "function": "fcl-judgment-parse-request",
229
243
  "producer": "FCL",
230
- "executionId": f"fcl_ex_id_{uuid.uuid4()}",
244
+ "executionId": str(uuid.uuid4()),
231
245
  "parentExecutionId": None,
232
246
  },
233
247
  "parameters": {
@@ -45,3 +45,20 @@ class SearchResponse:
45
45
  "//search:response/search:result", namespaces=self.NAMESPACES
46
46
  )
47
47
  return [SearchResult(result, self.client) for result in results]
48
+
49
+ @property
50
+ def facets(self) -> dict[str, str]:
51
+ """
52
+ Returns search facets from the SearchResponse as a dictionary
53
+
54
+ :return: A flattened dictionary of search facet values
55
+ """
56
+ # TODO: preserve the name of the facet (e.g. "court", "year")
57
+ results = self.node.xpath(
58
+ "//search:response/search:facet/search:facet-value",
59
+ namespaces={"search": "http://marklogic.com/appservices/search"},
60
+ )
61
+ facets_dictionary = {
62
+ result.attrib["name"]: result.attrib["count"] for result in results
63
+ }
64
+ return facets_dictionary
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 22.1.0
3
+ Version: 23.0.1
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -5,16 +5,16 @@ caselawclient/client_helpers/search_helpers.py,sha256=DYgUltPq8fFI2KkLRqH1-8zpbb
5
5
  caselawclient/content_hash.py,sha256=DF7ujrQPNf1bTSbK0mIIaC5qx6CmF5I0xlQ7uIG0zYI,2236
6
6
  caselawclient/errors.py,sha256=3rsbOQ11hIhm7-UABcHNMcs9XgcrIzytAP0koyZBLWM,3195
7
7
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
8
- caselawclient/models/documents.py,sha256=JXv0AFaFgjd5Mg5u_FUysJ7aNHcEhspK5aRSxcy8SkI,21817
8
+ caselawclient/models/documents.py,sha256=olPW1m6yNRZurlxFWbl-SgPGFPHn0wz_SuUAfeaSt0w,22502
9
9
  caselawclient/models/judgments.py,sha256=zufIORXux0OaOWs2bbVSd9xR-HSwrMJe5WAhHwJzzwM,1740
10
10
  caselawclient/models/neutral_citation_mixin.py,sha256=G9QS5XZ0tf_VXTxt4Uryy_gZ4eBDsmChqEClLAntIwI,1810
11
11
  caselawclient/models/press_summaries.py,sha256=2yx7xOqimjM8NsmueD0Ml3aev2fML3U-UksCRJRqPIM,1726
12
12
  caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
13
- caselawclient/models/utilities/aws.py,sha256=6J6dOhyNzSHkIpAFseJDDNBQTXkTs63qS1W-oXZaqG8,7648
13
+ caselawclient/models/utilities/aws.py,sha256=Mw244oUPeWB3p6Q7UYRKoy3aS0lE9E6lToE4MSVC5mw,8075
14
14
  caselawclient/models/utilities/move.py,sha256=_SKzO1UVXHFIVvWfT4nuCwdov7acp8tYzzEg-vVfUyg,5372
15
15
  caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
17
- caselawclient/responses/search_response.py,sha256=OlzXOEnCg-4HdfOYfeIMdk-UfROOI_Nz-etfznFffok,1335
17
+ caselawclient/responses/search_response.py,sha256=zXc_OSyGfoh_C1NHBEzjhiGj1tUIPaqgRUh7iA9EdY4,1959
18
18
  caselawclient/responses/search_result.py,sha256=BSVE2zBy-sy93fghd_kqwkhgwcnx1mFHY1_HTj-qO70,8096
19
19
  caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
20
20
  caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
@@ -60,7 +60,7 @@ caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21q
60
60
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
61
61
  caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
62
62
  caselawclient/xquery_type_dicts.py,sha256=YOrXbEYJU84S-YwergCI12OL5Wrn_wpqMcqWpsQrKek,5590
63
- ds_caselaw_marklogic_api_client-22.1.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
64
- ds_caselaw_marklogic_api_client-22.1.0.dist-info/METADATA,sha256=nGmZ7x-szFamzWs1pCV9KIT64fmz049BueAzq8BmupQ,4006
65
- ds_caselaw_marklogic_api_client-22.1.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
66
- ds_caselaw_marklogic_api_client-22.1.0.dist-info/RECORD,,
63
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
64
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/METADATA,sha256=Q1Rq_cBlSoVl5eVGKrAPYRbQ4qfKD7idunQFOaIV33s,4006
65
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
66
+ ds_caselaw_marklogic_api_client-23.0.1.dist-info/RECORD,,