ds-caselaw-marklogic-api-client 17.2.0__py3-none-any.whl → 18.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caselawclient/Client.py +0 -14
- caselawclient/__init__.py +0 -20
- caselawclient/client_helpers/search_helpers.py +10 -4
- caselawclient/models/documents.py +105 -37
- caselawclient/models/judgments.py +1 -1
- caselawclient/models/press_summaries.py +1 -1
- caselawclient/models/utilities/__init__.py +0 -9
- caselawclient/models/utilities/aws.py +61 -7
- caselawclient/responses/search_response.py +4 -16
- caselawclient/responses/search_result.py +10 -22
- caselawclient/search_parameters.py +2 -0
- {ds_caselaw_marklogic_api_client-17.2.0.dist-info → ds_caselaw_marklogic_api_client-18.0.0.dist-info}/METADATA +2 -2
- {ds_caselaw_marklogic_api_client-17.2.0.dist-info → ds_caselaw_marklogic_api_client-18.0.0.dist-info}/RECORD +15 -15
- {ds_caselaw_marklogic_api_client-17.2.0.dist-info → ds_caselaw_marklogic_api_client-18.0.0.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-17.2.0.dist-info → ds_caselaw_marklogic_api_client-18.0.0.dist-info}/WHEEL +0 -0
caselawclient/Client.py
CHANGED
|
@@ -951,17 +951,3 @@ class MarklogicApiClient:
|
|
|
951
951
|
)
|
|
952
952
|
|
|
953
953
|
return results
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
api_client = MarklogicApiClient(
|
|
957
|
-
host=env("MARKLOGIC_HOST", default=None),
|
|
958
|
-
username=env("MARKLOGIC_USER", default=None),
|
|
959
|
-
password=env("MARKLOGIC_PASSWORD", default=None),
|
|
960
|
-
use_https=env("MARKLOGIC_USE_HTTPS", default=False),
|
|
961
|
-
)
|
|
962
|
-
"""
|
|
963
|
-
An instance of the API client which is automatically initialised on importing the library.
|
|
964
|
-
|
|
965
|
-
.. deprecated:: 13.0.1
|
|
966
|
-
You should instead initialise your own instance of `MarklogicApiClient`
|
|
967
|
-
"""
|
caselawclient/__init__.py
CHANGED
|
@@ -37,24 +37,4 @@ client = MarklogicApiClient(
|
|
|
37
37
|
|
|
38
38
|
```
|
|
39
39
|
|
|
40
|
-
## (Deprecated) Use in-library client instance
|
|
41
|
-
|
|
42
|
-
This library will automatically initialise an instance of the client. This functionality is deprecated, and will be
|
|
43
|
-
removed.
|
|
44
|
-
|
|
45
|
-
The client expects the following environment variables to be set or defined in a `.env` file:
|
|
46
|
-
|
|
47
|
-
```bash
|
|
48
|
-
MARKLOGIC_HOST
|
|
49
|
-
MARKLOGIC_USER
|
|
50
|
-
MARKLOGIC_PASSWORD
|
|
51
|
-
MARKLOGIC_USE_HTTPS # Optional, defaults to False
|
|
52
|
-
```
|
|
53
|
-
|
|
54
|
-
Then import `api_client` from `caselawclient.Client`:
|
|
55
|
-
|
|
56
|
-
```python
|
|
57
|
-
from caselawclient.Client import api_client
|
|
58
|
-
```
|
|
59
|
-
|
|
60
40
|
"""
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
1
3
|
from caselawclient.Client import MarklogicApiClient
|
|
2
4
|
from caselawclient.responses.search_response import SearchResponse
|
|
3
5
|
from caselawclient.search_parameters import SearchParameters
|
|
@@ -14,8 +16,11 @@ def search_judgments_and_parse_response(
|
|
|
14
16
|
|
|
15
17
|
:return: The parsed search response as a SearchResponse object
|
|
16
18
|
"""
|
|
17
|
-
return SearchResponse
|
|
18
|
-
|
|
19
|
+
return SearchResponse(
|
|
20
|
+
etree.fromstring(
|
|
21
|
+
api_client.search_judgments_and_decode_response(search_parameters)
|
|
22
|
+
),
|
|
23
|
+
api_client,
|
|
19
24
|
)
|
|
20
25
|
|
|
21
26
|
|
|
@@ -30,6 +35,7 @@ def search_and_parse_response(
|
|
|
30
35
|
|
|
31
36
|
:return: The parsed search response as a SearchResponse object
|
|
32
37
|
"""
|
|
33
|
-
return SearchResponse
|
|
34
|
-
api_client.search_and_decode_response(search_parameters)
|
|
38
|
+
return SearchResponse(
|
|
39
|
+
etree.fromstring(api_client.search_and_decode_response(search_parameters)),
|
|
40
|
+
api_client,
|
|
35
41
|
)
|
|
@@ -18,13 +18,15 @@ from ..errors import (
|
|
|
18
18
|
OnlySupportedOnVersion,
|
|
19
19
|
)
|
|
20
20
|
from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
21
|
-
from .utilities import VersionsDict,
|
|
21
|
+
from .utilities import VersionsDict, render_versions
|
|
22
22
|
from .utilities.aws import (
|
|
23
|
+
ParserInstructionsDict,
|
|
24
|
+
announce_document_event,
|
|
23
25
|
delete_documents_from_private_bucket,
|
|
24
26
|
generate_docx_url,
|
|
25
27
|
generate_pdf_url,
|
|
26
|
-
notify_changed,
|
|
27
28
|
publish_documents,
|
|
29
|
+
request_parse,
|
|
28
30
|
unpublish_documents,
|
|
29
31
|
uri_for_s3,
|
|
30
32
|
)
|
|
@@ -74,6 +76,12 @@ class DocumentNotSafeForDeletion(Exception):
|
|
|
74
76
|
pass
|
|
75
77
|
|
|
76
78
|
|
|
79
|
+
class NonXMLDocumentError(Exception):
|
|
80
|
+
"""A document cannot be parsed as XML."""
|
|
81
|
+
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
77
85
|
class Document:
|
|
78
86
|
"""
|
|
79
87
|
A base class from which all other document types are extensions. This class includes the essential methods for
|
|
@@ -140,6 +148,12 @@ class Document:
|
|
|
140
148
|
if not self.document_exists():
|
|
141
149
|
raise DocumentNotFoundError(f"Document {self.uri} does not exist")
|
|
142
150
|
|
|
151
|
+
self.xml = self.XML(
|
|
152
|
+
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
|
|
153
|
+
self.uri, show_unpublished=True
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
143
157
|
def document_exists(self) -> bool:
|
|
144
158
|
"""Helper method to verify the existence of a document within MarkLogic.
|
|
145
159
|
|
|
@@ -165,14 +179,14 @@ class Document:
|
|
|
165
179
|
|
|
166
180
|
@cached_property
|
|
167
181
|
def name(self) -> str:
|
|
168
|
-
return self.
|
|
182
|
+
return self.xml.get_xpath_match_string(
|
|
169
183
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value",
|
|
170
184
|
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
171
185
|
)
|
|
172
186
|
|
|
173
187
|
@cached_property
|
|
174
188
|
def court(self) -> str:
|
|
175
|
-
return self.
|
|
189
|
+
return self.xml.get_xpath_match_string(
|
|
176
190
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()",
|
|
177
191
|
{
|
|
178
192
|
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
@@ -182,7 +196,7 @@ class Document:
|
|
|
182
196
|
|
|
183
197
|
@cached_property
|
|
184
198
|
def document_date_as_string(self) -> str:
|
|
185
|
-
return self.
|
|
199
|
+
return self.xml.get_xpath_match_string(
|
|
186
200
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
|
|
187
201
|
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
188
202
|
)
|
|
@@ -206,7 +220,7 @@ class Document:
|
|
|
206
220
|
self, name: Optional[str] = None
|
|
207
221
|
) -> list[datetime.datetime]:
|
|
208
222
|
name_filter = f"[@name='{name}']" if name else ""
|
|
209
|
-
iso_datetimes = self.
|
|
223
|
+
iso_datetimes = self.xml.get_xpath_match_strings(
|
|
210
224
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation"
|
|
211
225
|
f"/akn:FRBRdate{name_filter}/@date",
|
|
212
226
|
{"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"},
|
|
@@ -248,6 +262,14 @@ class Document:
|
|
|
248
262
|
def is_held(self) -> bool:
|
|
249
263
|
return self.api_client.get_property(self.uri, "editor-hold") == "true"
|
|
250
264
|
|
|
265
|
+
@cached_property
|
|
266
|
+
def is_locked(self) -> bool:
|
|
267
|
+
return self.checkout_message is not None
|
|
268
|
+
|
|
269
|
+
@cached_property
|
|
270
|
+
def checkout_message(self) -> Optional[str]:
|
|
271
|
+
return self.api_client.get_judgment_checkout_status_message(self.uri)
|
|
272
|
+
|
|
251
273
|
@cached_property
|
|
252
274
|
def source_name(self) -> str:
|
|
253
275
|
return self.api_client.get_property(self.uri, "source-name")
|
|
@@ -322,17 +344,7 @@ class Document:
|
|
|
322
344
|
|
|
323
345
|
@cached_property
|
|
324
346
|
def content_as_xml(self) -> str:
|
|
325
|
-
return self.
|
|
326
|
-
|
|
327
|
-
@cached_property
|
|
328
|
-
def content_as_xml_bytestring(self) -> bytes:
|
|
329
|
-
return self.api_client.get_judgment_xml_bytestring(
|
|
330
|
-
self.uri, show_unpublished=True
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
@cached_property
|
|
334
|
-
def content_as_xml_tree(self) -> Any:
|
|
335
|
-
return etree.fromstring(self.content_as_xml_bytestring)
|
|
347
|
+
return self.xml.xml_as_string
|
|
336
348
|
|
|
337
349
|
def content_as_html(
|
|
338
350
|
self,
|
|
@@ -391,13 +403,10 @@ class Document:
|
|
|
391
403
|
|
|
392
404
|
:return: `True` if there was a complete parser failure, otherwise `False`
|
|
393
405
|
"""
|
|
394
|
-
if "error" in self.
|
|
406
|
+
if "error" in self.xml.root_element:
|
|
395
407
|
return True
|
|
396
408
|
return False
|
|
397
409
|
|
|
398
|
-
def _get_root(self) -> str:
|
|
399
|
-
return get_judgment_root(self.content_as_xml_bytestring)
|
|
400
|
-
|
|
401
410
|
@cached_property
|
|
402
411
|
def has_name(self) -> bool:
|
|
403
412
|
if not self.name:
|
|
@@ -448,9 +457,12 @@ class Document:
|
|
|
448
457
|
return DOCUMENT_STATUS_NEW
|
|
449
458
|
|
|
450
459
|
def enrich(self) -> None:
|
|
451
|
-
|
|
460
|
+
"""
|
|
461
|
+
Announces to the ANNOUNCE SNS that the document is waiting to be enriched.
|
|
462
|
+
"""
|
|
463
|
+
announce_document_event(
|
|
452
464
|
uri=self.uri,
|
|
453
|
-
status="
|
|
465
|
+
status="enrich",
|
|
454
466
|
enrich=True,
|
|
455
467
|
)
|
|
456
468
|
|
|
@@ -464,20 +476,19 @@ class Document:
|
|
|
464
476
|
|
|
465
477
|
publish_documents(uri_for_s3(self.uri))
|
|
466
478
|
self.api_client.set_published(self.uri, True)
|
|
467
|
-
|
|
479
|
+
announce_document_event(
|
|
468
480
|
uri=self.uri,
|
|
469
|
-
status="
|
|
470
|
-
enrich=True,
|
|
481
|
+
status="publish",
|
|
471
482
|
)
|
|
483
|
+
self.enrich()
|
|
472
484
|
|
|
473
485
|
def unpublish(self) -> None:
|
|
474
486
|
self.api_client.break_checkout(self.uri)
|
|
475
487
|
unpublish_documents(uri_for_s3(self.uri))
|
|
476
488
|
self.api_client.set_published(self.uri, False)
|
|
477
|
-
|
|
489
|
+
announce_document_event(
|
|
478
490
|
uri=self.uri,
|
|
479
|
-
status="
|
|
480
|
-
enrich=False,
|
|
491
|
+
status="unpublish",
|
|
481
492
|
)
|
|
482
493
|
|
|
483
494
|
def hold(self) -> None:
|
|
@@ -507,16 +518,73 @@ class Document:
|
|
|
507
518
|
else:
|
|
508
519
|
raise DocumentNotSafeForDeletion()
|
|
509
520
|
|
|
510
|
-
def _get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
|
|
511
|
-
return get_xpath_match_string(self.content_as_xml_tree, xpath, namespaces)
|
|
512
|
-
|
|
513
|
-
def _get_xpath_match_strings(
|
|
514
|
-
self, xpath: str, namespaces: Dict[str, str]
|
|
515
|
-
) -> list[str]:
|
|
516
|
-
return get_xpath_match_strings(self.content_as_xml_tree, xpath, namespaces)
|
|
517
|
-
|
|
518
521
|
def overwrite(self, new_citation: str) -> None:
|
|
519
522
|
self.api_client.overwrite_document(self.uri, new_citation)
|
|
520
523
|
|
|
521
524
|
def move(self, new_citation: str) -> None:
|
|
522
525
|
self.api_client.update_document_uri(self.uri, new_citation)
|
|
526
|
+
|
|
527
|
+
def reparse(self) -> None:
|
|
528
|
+
"Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
|
|
529
|
+
|
|
530
|
+
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[
|
|
531
|
+
self.document_noun
|
|
532
|
+
]
|
|
533
|
+
checked_date = (
|
|
534
|
+
self.document_date_as_string
|
|
535
|
+
if self.document_date_as_string > "1001"
|
|
536
|
+
else None
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
# the keys of parser_instructions should exactly match the parser output
|
|
540
|
+
# in the *-metadata.json files by the parser. Whilst typically empty
|
|
541
|
+
# values are "" from the API, we should pass None instead in this case.
|
|
542
|
+
|
|
543
|
+
parser_instructions: ParserInstructionsDict = {
|
|
544
|
+
"name": self.name or None,
|
|
545
|
+
"cite": self.best_human_identifier or None,
|
|
546
|
+
"court": self.court or None,
|
|
547
|
+
"date": checked_date,
|
|
548
|
+
"uri": self.uri,
|
|
549
|
+
"documentType": parser_type_noun,
|
|
550
|
+
"published": self.is_published,
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
request_parse(
|
|
554
|
+
uri=self.uri,
|
|
555
|
+
reference=self.consignment_reference,
|
|
556
|
+
parser_instructions=parser_instructions,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
class XML:
|
|
560
|
+
"""
|
|
561
|
+
Represents the XML of a document, and should contain all methods for interacting with it.
|
|
562
|
+
"""
|
|
563
|
+
|
|
564
|
+
def __init__(self, xml_bytestring: bytes):
|
|
565
|
+
"""
|
|
566
|
+
:raises NonXMLDocumentError: This document is not valid XML
|
|
567
|
+
"""
|
|
568
|
+
try:
|
|
569
|
+
self.xml_as_tree: etree.Element = etree.fromstring(xml_bytestring)
|
|
570
|
+
except etree.XMLSyntaxError:
|
|
571
|
+
raise NonXMLDocumentError
|
|
572
|
+
|
|
573
|
+
@property
|
|
574
|
+
def xml_as_string(self) -> str:
|
|
575
|
+
"""
|
|
576
|
+
:return: A string representation of this document's XML tree.
|
|
577
|
+
"""
|
|
578
|
+
return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
|
|
579
|
+
|
|
580
|
+
@property
|
|
581
|
+
def root_element(self) -> str:
|
|
582
|
+
return str(self.xml_as_tree.tag)
|
|
583
|
+
|
|
584
|
+
def get_xpath_match_string(self, xpath: str, namespaces: Dict[str, str]) -> str:
|
|
585
|
+
return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
|
|
586
|
+
|
|
587
|
+
def get_xpath_match_strings(
|
|
588
|
+
self, xpath: str, namespaces: Dict[str, str]
|
|
589
|
+
) -> list[str]:
|
|
590
|
+
return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
|
|
@@ -21,7 +21,7 @@ class Judgment(NeutralCitationMixin, Document):
|
|
|
21
21
|
@cached_property
|
|
22
22
|
def neutral_citation(self) -> str:
|
|
23
23
|
return get_xpath_match_string(
|
|
24
|
-
self.
|
|
24
|
+
self.xml.xml_as_tree,
|
|
25
25
|
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
|
|
26
26
|
{
|
|
27
27
|
"uk": "https://caselaw.nationalarchives.gov.uk/akn",
|
|
@@ -21,7 +21,7 @@ class PressSummary(NeutralCitationMixin, Document):
|
|
|
21
21
|
@cached_property
|
|
22
22
|
def neutral_citation(self) -> str:
|
|
23
23
|
return get_xpath_match_string(
|
|
24
|
-
self.
|
|
24
|
+
self.xml.xml_as_tree,
|
|
25
25
|
"/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
|
|
26
26
|
{
|
|
27
27
|
"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import xml.etree.ElementTree as ET
|
|
3
2
|
from typing import TypedDict
|
|
4
3
|
|
|
5
4
|
from requests_toolbelt.multipart.decoder import BodyPart
|
|
@@ -12,14 +11,6 @@ akn_namespace = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"}
|
|
|
12
11
|
uk_namespace = {"uk": "https://caselaw.nationalarchives.gov.uk/akn"}
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
def get_judgment_root(judgment_xml: bytes) -> str:
|
|
16
|
-
try:
|
|
17
|
-
parsed_xml = ET.XML(judgment_xml)
|
|
18
|
-
return parsed_xml.tag
|
|
19
|
-
except ET.ParseError:
|
|
20
|
-
return "error"
|
|
21
|
-
|
|
22
|
-
|
|
23
14
|
class VersionsDict(TypedDict):
|
|
24
15
|
uri: str
|
|
25
16
|
version: int
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
|
-
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, Literal, Optional, TypedDict, Union, overload
|
|
4
6
|
|
|
5
7
|
import boto3
|
|
6
8
|
import botocore.client
|
|
@@ -9,10 +11,21 @@ from mypy_boto3_s3.client import S3Client
|
|
|
9
11
|
from mypy_boto3_s3.type_defs import CopySourceTypeDef, ObjectIdentifierTypeDef
|
|
10
12
|
from mypy_boto3_sns.client import SNSClient
|
|
11
13
|
from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
|
|
14
|
+
from typing_extensions import NotRequired
|
|
12
15
|
|
|
13
16
|
env = environ.Env()
|
|
14
17
|
|
|
15
18
|
|
|
19
|
+
class ParserInstructionsDict(TypedDict):
|
|
20
|
+
name: NotRequired[Optional[str]]
|
|
21
|
+
cite: NotRequired[Optional[str]]
|
|
22
|
+
court: NotRequired[Optional[str]]
|
|
23
|
+
date: NotRequired[Optional[str]]
|
|
24
|
+
uri: NotRequired[Optional[str]]
|
|
25
|
+
documentType: NotRequired[Optional[str]]
|
|
26
|
+
published: NotRequired[bool]
|
|
27
|
+
|
|
28
|
+
|
|
16
29
|
@overload
|
|
17
30
|
def create_aws_client(service: Literal["s3"]) -> S3Client:
|
|
18
31
|
...
|
|
@@ -64,10 +77,14 @@ def generate_signed_asset_url(key: str) -> str:
|
|
|
64
77
|
)
|
|
65
78
|
|
|
66
79
|
|
|
67
|
-
def
|
|
68
|
-
|
|
80
|
+
def generate_docx_key(uri: str) -> str:
|
|
81
|
+
"""from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
|
|
82
|
+
return f'{uri}/{uri.replace("/", "_")}.docx'
|
|
69
83
|
|
|
70
|
-
|
|
84
|
+
|
|
85
|
+
def generate_docx_url(uri: str) -> str:
|
|
86
|
+
"""from a canonical caselaw URI (eat/2022/1) return a signed S3 link for the front end"""
|
|
87
|
+
return generate_signed_asset_url(generate_docx_key(uri))
|
|
71
88
|
|
|
72
89
|
|
|
73
90
|
def generate_pdf_url(uri: str) -> str:
|
|
@@ -110,7 +127,7 @@ def publish_documents(uri: str) -> None:
|
|
|
110
127
|
|
|
111
128
|
if not key.endswith("parser.log") and not key.endswith(".tar.gz"):
|
|
112
129
|
source: CopySourceTypeDef = {"Bucket": private_bucket, "Key": key}
|
|
113
|
-
extra_args = {
|
|
130
|
+
extra_args: dict[str, str] = {}
|
|
114
131
|
try:
|
|
115
132
|
client.copy(source, public_bucket, key, extra_args)
|
|
116
133
|
except botocore.client.ClientError as e:
|
|
@@ -127,7 +144,7 @@ def delete_documents_from_private_bucket(uri: str) -> None:
|
|
|
127
144
|
delete_from_bucket(uri, env("PRIVATE_ASSET_BUCKET"))
|
|
128
145
|
|
|
129
146
|
|
|
130
|
-
def
|
|
147
|
+
def announce_document_event(uri: str, status: str, enrich: bool = False) -> None:
|
|
131
148
|
client = create_sns_client()
|
|
132
149
|
|
|
133
150
|
message_attributes: dict[str, MessageAttributeValueTypeDef] = {}
|
|
@@ -146,7 +163,7 @@ def notify_changed(uri: str, status: str, enrich: bool = False) -> None:
|
|
|
146
163
|
}
|
|
147
164
|
|
|
148
165
|
client.publish(
|
|
149
|
-
TopicArn=env("SNS_TOPIC"),
|
|
166
|
+
TopicArn=env("SNS_TOPIC"), # this is the ANNOUNCE SNS topic
|
|
150
167
|
Message=json.dumps({"uri_reference": uri, "status": status}),
|
|
151
168
|
Subject=f"Updated: {uri} {status}",
|
|
152
169
|
MessageAttributes=message_attributes,
|
|
@@ -189,3 +206,40 @@ def build_new_key(old_key: str, new_uri: str) -> str:
|
|
|
189
206
|
return f"{new_uri}/{new_filename}.{old_filename.split('.')[-1]}"
|
|
190
207
|
else:
|
|
191
208
|
return f"{new_uri}/{old_filename}"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def request_parse(
|
|
212
|
+
uri: str,
|
|
213
|
+
reference: Optional[str],
|
|
214
|
+
parser_instructions: Optional[ParserInstructionsDict] = None,
|
|
215
|
+
) -> None:
|
|
216
|
+
client = create_sns_client()
|
|
217
|
+
|
|
218
|
+
if parser_instructions is None:
|
|
219
|
+
parser_instructions = ParserInstructionsDict({})
|
|
220
|
+
|
|
221
|
+
message_to_send = {
|
|
222
|
+
"properties": {
|
|
223
|
+
"messageType": "uk.gov.nationalarchives.da.messages.request.courtdocument.parse.RequestCourtDocumentParse",
|
|
224
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc)
|
|
225
|
+
.isoformat()
|
|
226
|
+
.replace("+00:00", "Z"),
|
|
227
|
+
"function": "fcl-judgment-parse-request",
|
|
228
|
+
"producer": "FCL",
|
|
229
|
+
"executionId": f"fcl_ex_id_{uuid.uuid4()}",
|
|
230
|
+
"parentExecutionId": None,
|
|
231
|
+
},
|
|
232
|
+
"parameters": {
|
|
233
|
+
"s3Bucket": env("PRIVATE_ASSET_BUCKET"),
|
|
234
|
+
"s3Key": generate_docx_key(uri),
|
|
235
|
+
"reference": reference or f"FCL-{uuid.uuid4()}",
|
|
236
|
+
"originator": "FCL",
|
|
237
|
+
"parserInstructions": parser_instructions,
|
|
238
|
+
},
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
client.publish(
|
|
242
|
+
TopicArn=env("REPARSE_SNS_TOPIC"),
|
|
243
|
+
Message=json.dumps(message_to_send),
|
|
244
|
+
Subject=f"Reparse request: {uri}",
|
|
245
|
+
)
|
|
@@ -2,6 +2,7 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
from lxml import etree
|
|
4
4
|
|
|
5
|
+
from caselawclient.Client import MarklogicApiClient
|
|
5
6
|
from caselawclient.responses.search_result import SearchResult
|
|
6
7
|
|
|
7
8
|
|
|
@@ -13,22 +14,14 @@ class SearchResponse:
|
|
|
13
14
|
NAMESPACES = {"search": "http://marklogic.com/appservices/search"}
|
|
14
15
|
""" Namespaces used in XPath expressions."""
|
|
15
16
|
|
|
16
|
-
def __init__(self, node: etree._Element) -> None:
|
|
17
|
+
def __init__(self, node: etree._Element, client: MarklogicApiClient) -> None:
|
|
17
18
|
"""
|
|
18
19
|
Initializes a SearchResponse instance from an xml node.
|
|
19
20
|
|
|
20
21
|
:param node: The XML data as an etree element
|
|
21
22
|
"""
|
|
22
23
|
self.node = node
|
|
23
|
-
|
|
24
|
-
@staticmethod
|
|
25
|
-
def from_response_string(xml: str) -> "SearchResponse":
|
|
26
|
-
"""
|
|
27
|
-
Constructs a SearchResponse instance from an xml response string.
|
|
28
|
-
|
|
29
|
-
:param xml: The XML data as a string
|
|
30
|
-
"""
|
|
31
|
-
return SearchResponse(etree.fromstring(xml))
|
|
24
|
+
self.client = client
|
|
32
25
|
|
|
33
26
|
@property
|
|
34
27
|
def total(self) -> str:
|
|
@@ -51,9 +44,4 @@ class SearchResponse:
|
|
|
51
44
|
results = self.node.xpath(
|
|
52
45
|
"//search:response/search:result", namespaces=self.NAMESPACES
|
|
53
46
|
)
|
|
54
|
-
return [
|
|
55
|
-
SearchResult(
|
|
56
|
-
result,
|
|
57
|
-
)
|
|
58
|
-
for result in results
|
|
59
|
-
]
|
|
47
|
+
return [SearchResult(result, self.client) for result in results]
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from enum import Enum
|
|
5
|
+
from functools import cached_property
|
|
5
6
|
from typing import Dict, Optional
|
|
6
7
|
|
|
7
8
|
from dateutil import parser as dateparser
|
|
@@ -9,7 +10,7 @@ from dateutil.parser import ParserError
|
|
|
9
10
|
from ds_caselaw_utils.courts import Court, CourtNotFoundException, courts
|
|
10
11
|
from lxml import etree
|
|
11
12
|
|
|
12
|
-
from caselawclient.Client import
|
|
13
|
+
from caselawclient.Client import MarklogicApiClient
|
|
13
14
|
from caselawclient.models.documents import DocumentURIString
|
|
14
15
|
from caselawclient.xml_helpers import get_xpath_match_string
|
|
15
16
|
|
|
@@ -44,20 +45,6 @@ class SearchResultMetadata:
|
|
|
44
45
|
self.node = node
|
|
45
46
|
self.last_modified = last_modified
|
|
46
47
|
|
|
47
|
-
@staticmethod
|
|
48
|
-
def create_from_uri(uri: DocumentURIString) -> "SearchResultMetadata":
|
|
49
|
-
"""
|
|
50
|
-
Create a SearchResultMetadata instance from a search result URI.
|
|
51
|
-
|
|
52
|
-
:param uri: The URI of the search result
|
|
53
|
-
|
|
54
|
-
:return: The created SearchResultMetadata instance
|
|
55
|
-
"""
|
|
56
|
-
response_text = api_client.get_properties_for_search_results([uri])
|
|
57
|
-
last_modified = api_client.get_last_modified(uri)
|
|
58
|
-
root = etree.fromstring(response_text)
|
|
59
|
-
return SearchResultMetadata(root, last_modified)
|
|
60
|
-
|
|
61
48
|
@property
|
|
62
49
|
def author(self) -> str:
|
|
63
50
|
"""
|
|
@@ -162,12 +149,13 @@ class SearchResult:
|
|
|
162
149
|
}
|
|
163
150
|
""" Namespace mappings used in XPath expressions. """
|
|
164
151
|
|
|
165
|
-
def __init__(self, node: etree._Element):
|
|
152
|
+
def __init__(self, node: etree._Element, client: MarklogicApiClient):
|
|
166
153
|
"""
|
|
167
154
|
:param node: The XML element representing the search result
|
|
168
155
|
"""
|
|
169
156
|
|
|
170
157
|
self.node = node
|
|
158
|
+
self.client = client
|
|
171
159
|
|
|
172
160
|
@property
|
|
173
161
|
def uri(self) -> DocumentURIString:
|
|
@@ -259,15 +247,15 @@ class SearchResult:
|
|
|
259
247
|
xslt_transform = etree.XSLT(etree.parse(file_path))
|
|
260
248
|
return str(xslt_transform(self.node))
|
|
261
249
|
|
|
262
|
-
@
|
|
250
|
+
@cached_property
|
|
263
251
|
def metadata(self) -> SearchResultMetadata:
|
|
264
252
|
"""
|
|
265
|
-
:return:
|
|
253
|
+
:return: A `SearchResultMetadata` instance representing the metadata of this result
|
|
266
254
|
"""
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
255
|
+
response_text = self.client.get_properties_for_search_results([self.uri])
|
|
256
|
+
last_modified = self.client.get_last_modified(self.uri)
|
|
257
|
+
root = etree.fromstring(response_text)
|
|
258
|
+
return SearchResultMetadata(root, last_modified)
|
|
271
259
|
|
|
272
260
|
def _get_xpath_match_string(self, path: str) -> str:
|
|
273
261
|
return get_xpath_match_string(self.node, path, namespaces=self.NAMESPACES)
|
|
@@ -73,6 +73,8 @@ class SearchParameters:
|
|
|
73
73
|
"ewhc/kb": "ewhc/qb",
|
|
74
74
|
"ewhc/scco": "ewhc/costs",
|
|
75
75
|
"ewhc/costs": "ewhc/scco",
|
|
76
|
+
"ukait": "ukut/iac",
|
|
77
|
+
"ukut/iac": "ukait",
|
|
76
78
|
}
|
|
77
79
|
alternative_court_names = set()
|
|
78
80
|
for primary_name, secondary_name in ALTERNATIVE_COURT_NAMES_MAP.items():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 18.0.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -14,7 +14,7 @@ Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
|
14
14
|
Requires-Dist: certifi (>=2022.12.7,<2024.0.0)
|
|
15
15
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: django-environ (>=0.11.0,<0.12.0)
|
|
17
|
-
Requires-Dist: ds-caselaw-utils (>=1.
|
|
17
|
+
Requires-Dist: ds-caselaw-utils (>=1.3.3,<2.0.0)
|
|
18
18
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
19
19
|
Requires-Dist: lxml (>=4.9.2,<5.0.0)
|
|
20
20
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
caselawclient/Client.py,sha256=
|
|
2
|
-
caselawclient/__init__.py,sha256=
|
|
1
|
+
caselawclient/Client.py,sha256=c8DvbUAhBGpraD5oj1sVOtkco-j6Vq1zG5zshAOiuHI,34589
|
|
2
|
+
caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
|
|
3
3
|
caselawclient/client_helpers/__init__.py,sha256=6vUjIwi777iaNDBUYwWmpzgAXeFHeXnmmMBniVmjUP8,3830
|
|
4
|
-
caselawclient/client_helpers/search_helpers.py,sha256=
|
|
4
|
+
caselawclient/client_helpers/search_helpers.py,sha256=DYgUltPq8fFI2KkLRqH1-8zpbb8_swBFyBvvgBbinig,1514
|
|
5
5
|
caselawclient/content_hash.py,sha256=DF7ujrQPNf1bTSbK0mIIaC5qx6CmF5I0xlQ7uIG0zYI,2236
|
|
6
6
|
caselawclient/errors.py,sha256=3rsbOQ11hIhm7-UABcHNMcs9XgcrIzytAP0koyZBLWM,3195
|
|
7
7
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
8
|
-
caselawclient/models/documents.py,sha256=
|
|
9
|
-
caselawclient/models/judgments.py,sha256=
|
|
8
|
+
caselawclient/models/documents.py,sha256=qiIb2sKBL_sbrAOwRI62MWM6V0GRXndzxWbfx4XbQbA,19447
|
|
9
|
+
caselawclient/models/judgments.py,sha256=TcAsn27K--QQAfaaUZ8biybB9OeVS__91FRlwaG16HY,1020
|
|
10
10
|
caselawclient/models/neutral_citation_mixin.py,sha256=qqB1K4IHVy0XvdY40sfVywZ6VGaZ9ojHcVOQRyi0Vhc,1752
|
|
11
|
-
caselawclient/models/press_summaries.py,sha256=
|
|
12
|
-
caselawclient/models/utilities/__init__.py,sha256=
|
|
13
|
-
caselawclient/models/utilities/aws.py,sha256=
|
|
11
|
+
caselawclient/models/press_summaries.py,sha256=5c1jpVhVtmIMN8AeHMywGXvz4H55kKAIUaaaVims6Tw,994
|
|
12
|
+
caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
|
|
13
|
+
caselawclient/models/utilities/aws.py,sha256=HxmcoDXPpkDfxDH-tm3HtxnMCfOC2qIuy6PZs46OveY,7645
|
|
14
14
|
caselawclient/models/utilities/move.py,sha256=_SKzO1UVXHFIVvWfT4nuCwdov7acp8tYzzEg-vVfUyg,5372
|
|
15
15
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
|
|
17
|
-
caselawclient/responses/search_response.py,sha256=
|
|
18
|
-
caselawclient/responses/search_result.py,sha256=
|
|
17
|
+
caselawclient/responses/search_response.py,sha256=OlzXOEnCg-4HdfOYfeIMdk-UfROOI_Nz-etfznFffok,1335
|
|
18
|
+
caselawclient/responses/search_result.py,sha256=M6BSzkGRn0TY2Qs17bplv1RbhBHECAypUjnXnoslltc,7198
|
|
19
19
|
caselawclient/responses/xsl/search_match.xsl,sha256=g_CYe8bOQ-Hnzt1YaobbdEP5B71XtEJhJG8jrfc9rG0,918
|
|
20
|
-
caselawclient/search_parameters.py,sha256=
|
|
20
|
+
caselawclient/search_parameters.py,sha256=cT2pJueQNNNbbzcwpEoTz_9yHVv9ZD6GsfycXKDtgCQ,3075
|
|
21
21
|
caselawclient/xml_helpers.py,sha256=Ihy6OxdEHm7LFePX0XF6uzpyp6yo7U13zyP0nhpBguU,582
|
|
22
22
|
caselawclient/xml_tools.py,sha256=AXYMml8TZfRu3_CGG9nqsPUz_e4RRu4ZbBeFxKuTSFY,3830
|
|
23
23
|
caselawclient/xquery/break_judgment_checkout.xqy,sha256=rISzoBKxQKrP5ZRdCSoRqOXW8T_NDBSZRFjOXo_H3ns,220
|
|
@@ -53,7 +53,7 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
|
|
|
53
53
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
54
54
|
caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
|
|
55
55
|
caselawclient/xquery_type_dicts.py,sha256=MZwjEURV_s-USIeX_qr_5VXcfswwoBgFQdw1ITVmtwQ,4726
|
|
56
|
-
ds_caselaw_marklogic_api_client-
|
|
57
|
-
ds_caselaw_marklogic_api_client-
|
|
58
|
-
ds_caselaw_marklogic_api_client-
|
|
59
|
-
ds_caselaw_marklogic_api_client-
|
|
56
|
+
ds_caselaw_marklogic_api_client-18.0.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
57
|
+
ds_caselaw_marklogic_api_client-18.0.0.dist-info/METADATA,sha256=mXQ4jemxpauxmDZMzRDXYgN0_Qc-dyWfxWCZl7bVY-A,4006
|
|
58
|
+
ds_caselaw_marklogic_api_client-18.0.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
59
|
+
ds_caselaw_marklogic_api_client-18.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|