ds-caselaw-marklogic-api-client 17.1.0__tar.gz → 17.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/PKG-INFO +2 -2
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/pyproject.toml +5 -3
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/Client.py +5 -1
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/documents.py +81 -15
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/utilities/aws.py +61 -7
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/search_parameters.py +2 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/xslt_transform.xqy +37 -1
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery_type_dicts.py +1 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/README.md +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/client_helpers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/errors.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/judgments.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/press_summaries.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/utilities/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/responses/search_result.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xml_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xml_tools.py +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/src/caselawclient/xquery/xslt.xqy +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version: 17.
|
|
3
|
+
Version: 17.3.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -14,7 +14,7 @@ Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
|
14
14
|
Requires-Dist: certifi (>=2022.12.7,<2024.0.0)
|
|
15
15
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
16
|
Requires-Dist: django-environ (>=0.11.0,<0.12.0)
|
|
17
|
-
Requires-Dist: ds-caselaw-utils (>=1.
|
|
17
|
+
Requires-Dist: ds-caselaw-utils (>=1.3.0,<2.0.0)
|
|
18
18
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
19
19
|
Requires-Dist: lxml (>=4.9.2,<5.0.0)
|
|
20
20
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
{ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "17.
|
|
3
|
+
version = "17.3.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -21,7 +21,7 @@ requests-toolbelt = ">=0.10.1,<1.1.0"
|
|
|
21
21
|
urllib3 = "^1.26.15"
|
|
22
22
|
memoization = "^0.4.0"
|
|
23
23
|
lxml = "^4.9.2"
|
|
24
|
-
ds-caselaw-utils = "^1.
|
|
24
|
+
ds-caselaw-utils = "^1.3.0"
|
|
25
25
|
boto3 = "^1.26.112"
|
|
26
26
|
typing-extensions = "^4.7.1"
|
|
27
27
|
mypy-boto3-s3 = "^1.26.104"
|
|
@@ -30,7 +30,7 @@ mypy-boto3-sns = "^1.26.69"
|
|
|
30
30
|
[tool.poetry.group.dev.dependencies]
|
|
31
31
|
coverage = "^7.2.3"
|
|
32
32
|
pytest = "^7.3.2"
|
|
33
|
-
responses = "^0.
|
|
33
|
+
responses = "^0.24.0"
|
|
34
34
|
python-dotenv = "^1.0.0"
|
|
35
35
|
|
|
36
36
|
[tool.poetry.group.docs]
|
|
@@ -47,3 +47,5 @@ build-backend = "poetry.core.masonry.api"
|
|
|
47
47
|
markers = [
|
|
48
48
|
"write: the test deliberately changes the Marklogic DB')",
|
|
49
49
|
]
|
|
50
|
+
|
|
51
|
+
filterwarnings = ["ignore::DeprecationWarning"]
|
|
@@ -205,7 +205,9 @@ class MarklogicApiClient:
|
|
|
205
205
|
self.session.headers.update({"User-Agent": user_agent})
|
|
206
206
|
self.user_agent = user_agent
|
|
207
207
|
|
|
208
|
-
def get_document_by_uri(
|
|
208
|
+
def get_document_by_uri(
|
|
209
|
+
self, uri: DocumentURIString, query: Optional[str] = None
|
|
210
|
+
) -> Document:
|
|
209
211
|
document_type_class = self.get_document_type_from_uri(uri)
|
|
210
212
|
return document_type_class(uri, self)
|
|
211
213
|
|
|
@@ -684,6 +686,7 @@ class MarklogicApiClient:
|
|
|
684
686
|
version_uri: Optional[DocumentURIString] = None,
|
|
685
687
|
show_unpublished: bool = False,
|
|
686
688
|
xsl_filename: str = DEFAULT_XSL_TRANSFORM,
|
|
689
|
+
query: Optional[str] = None,
|
|
687
690
|
) -> requests.Response:
|
|
688
691
|
marklogic_document_uri = self._format_uri_for_marklogic(judgment_uri)
|
|
689
692
|
marklogic_document_version_uri = (
|
|
@@ -707,6 +710,7 @@ class MarklogicApiClient:
|
|
|
707
710
|
"show_unpublished": show_unpublished,
|
|
708
711
|
"img_location": image_location,
|
|
709
712
|
"xsl_filename": xsl_filename,
|
|
713
|
+
"query": query,
|
|
710
714
|
}
|
|
711
715
|
|
|
712
716
|
return self._send_to_eval(vars, "xslt_transform.xqy")
|
|
@@ -6,23 +6,27 @@ from typing import TYPE_CHECKING, Any, Dict, NewType, Optional
|
|
|
6
6
|
from ds_caselaw_utils import courts
|
|
7
7
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
8
8
|
from lxml import etree
|
|
9
|
+
from lxml import html as html_parser
|
|
9
10
|
from requests_toolbelt.multipart import decoder
|
|
10
11
|
|
|
11
12
|
from caselawclient.models.utilities import extract_version
|
|
12
13
|
|
|
13
14
|
from ..errors import (
|
|
14
15
|
DocumentNotFoundError,
|
|
16
|
+
GatewayTimeoutError,
|
|
15
17
|
NotSupportedOnVersion,
|
|
16
18
|
OnlySupportedOnVersion,
|
|
17
19
|
)
|
|
18
20
|
from ..xml_helpers import get_xpath_match_string, get_xpath_match_strings
|
|
19
21
|
from .utilities import VersionsDict, get_judgment_root, render_versions
|
|
20
22
|
from .utilities.aws import (
|
|
23
|
+
ParserInstructionsDict,
|
|
24
|
+
announce_document_event,
|
|
21
25
|
delete_documents_from_private_bucket,
|
|
22
26
|
generate_docx_url,
|
|
23
27
|
generate_pdf_url,
|
|
24
|
-
notify_changed,
|
|
25
28
|
publish_documents,
|
|
29
|
+
request_parse,
|
|
26
30
|
unpublish_documents,
|
|
27
31
|
uri_for_s3,
|
|
28
32
|
)
|
|
@@ -32,6 +36,10 @@ class UnparsableDate(Warning):
|
|
|
32
36
|
pass
|
|
33
37
|
|
|
34
38
|
|
|
39
|
+
class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning):
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
35
43
|
DOCUMENT_STATUS_HOLD = "On hold"
|
|
36
44
|
""" This document has been placed on hold to actively prevent publication. """
|
|
37
45
|
|
|
@@ -328,12 +336,36 @@ class Document:
|
|
|
328
336
|
def content_as_xml_tree(self) -> Any:
|
|
329
337
|
return etree.fromstring(self.content_as_xml_bytestring)
|
|
330
338
|
|
|
331
|
-
def content_as_html(
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
339
|
+
def content_as_html(
|
|
340
|
+
self,
|
|
341
|
+
version_uri: Optional[DocumentURIString] = None,
|
|
342
|
+
query: Optional[str] = None,
|
|
343
|
+
) -> str:
|
|
344
|
+
try:
|
|
345
|
+
results = self.api_client.eval_xslt(
|
|
346
|
+
self.uri, version_uri, show_unpublished=True, query=query
|
|
347
|
+
)
|
|
348
|
+
multipart_data = decoder.MultipartDecoder.from_response(results)
|
|
349
|
+
return str(multipart_data.parts[0].text)
|
|
350
|
+
except GatewayTimeoutError as e:
|
|
351
|
+
if query is not None:
|
|
352
|
+
warnings.warn(
|
|
353
|
+
(
|
|
354
|
+
"Gateway timeout when getting content with query"
|
|
355
|
+
"highlighting for document %s, version %s, and query"
|
|
356
|
+
'"%s", falling back to unhighlighted content...'
|
|
357
|
+
)
|
|
358
|
+
% (self.uri, version_uri, query),
|
|
359
|
+
GatewayTimeoutGettingHTMLWithQuery,
|
|
360
|
+
)
|
|
361
|
+
return self.content_as_html(version_uri)
|
|
362
|
+
else:
|
|
363
|
+
raise e
|
|
364
|
+
|
|
365
|
+
def number_of_mentions(self, query: str) -> int:
|
|
366
|
+
html = self.content_as_html(query=query)
|
|
367
|
+
tree = html_parser.fromstring(html.encode("utf-8"))
|
|
368
|
+
return len(tree.findall(".//mark"))
|
|
337
369
|
|
|
338
370
|
@cached_property
|
|
339
371
|
def is_failure(self) -> bool:
|
|
@@ -418,9 +450,12 @@ class Document:
|
|
|
418
450
|
return DOCUMENT_STATUS_NEW
|
|
419
451
|
|
|
420
452
|
def enrich(self) -> None:
|
|
421
|
-
|
|
453
|
+
"""
|
|
454
|
+
Announces to the ANNOUNCE SNS that the document is waiting to be enriched.
|
|
455
|
+
"""
|
|
456
|
+
announce_document_event(
|
|
422
457
|
uri=self.uri,
|
|
423
|
-
status="
|
|
458
|
+
status="enrich",
|
|
424
459
|
enrich=True,
|
|
425
460
|
)
|
|
426
461
|
|
|
@@ -434,20 +469,19 @@ class Document:
|
|
|
434
469
|
|
|
435
470
|
publish_documents(uri_for_s3(self.uri))
|
|
436
471
|
self.api_client.set_published(self.uri, True)
|
|
437
|
-
|
|
472
|
+
announce_document_event(
|
|
438
473
|
uri=self.uri,
|
|
439
|
-
status="
|
|
440
|
-
enrich=True,
|
|
474
|
+
status="publish",
|
|
441
475
|
)
|
|
476
|
+
self.enrich()
|
|
442
477
|
|
|
443
478
|
def unpublish(self) -> None:
|
|
444
479
|
self.api_client.break_checkout(self.uri)
|
|
445
480
|
unpublish_documents(uri_for_s3(self.uri))
|
|
446
481
|
self.api_client.set_published(self.uri, False)
|
|
447
|
-
|
|
482
|
+
announce_document_event(
|
|
448
483
|
uri=self.uri,
|
|
449
|
-
status="
|
|
450
|
-
enrich=False,
|
|
484
|
+
status="unpublish",
|
|
451
485
|
)
|
|
452
486
|
|
|
453
487
|
def hold(self) -> None:
|
|
@@ -490,3 +524,35 @@ class Document:
|
|
|
490
524
|
|
|
491
525
|
def move(self, new_citation: str) -> None:
|
|
492
526
|
self.api_client.update_document_uri(self.uri, new_citation)
|
|
527
|
+
|
|
528
|
+
def reparse(self) -> None:
|
|
529
|
+
"Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
|
|
530
|
+
|
|
531
|
+
parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[
|
|
532
|
+
self.document_noun
|
|
533
|
+
]
|
|
534
|
+
checked_date = (
|
|
535
|
+
self.document_date_as_string
|
|
536
|
+
if self.document_date_as_string > "1001"
|
|
537
|
+
else None
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
# the keys of parser_instructions should exactly match the parser output
|
|
541
|
+
# in the *-metadata.json files by the parser. Whilst typically empty
|
|
542
|
+
# values are "" from the API, we should pass None instead in this case.
|
|
543
|
+
|
|
544
|
+
parser_instructions: ParserInstructionsDict = {
|
|
545
|
+
"name": self.name or None,
|
|
546
|
+
"cite": self.best_human_identifier or None,
|
|
547
|
+
"court": self.court or None,
|
|
548
|
+
"date": checked_date,
|
|
549
|
+
"uri": self.uri,
|
|
550
|
+
"documentType": parser_type_noun,
|
|
551
|
+
"published": self.is_published,
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
request_parse(
|
|
555
|
+
uri=self.uri,
|
|
556
|
+
reference=self.consignment_reference,
|
|
557
|
+
parser_instructions=parser_instructions,
|
|
558
|
+
)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
|
-
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, Literal, Optional, TypedDict, Union, overload
|
|
4
6
|
|
|
5
7
|
import boto3
|
|
6
8
|
import botocore.client
|
|
@@ -9,10 +11,21 @@ from mypy_boto3_s3.client import S3Client
|
|
|
9
11
|
from mypy_boto3_s3.type_defs import CopySourceTypeDef, ObjectIdentifierTypeDef
|
|
10
12
|
from mypy_boto3_sns.client import SNSClient
|
|
11
13
|
from mypy_boto3_sns.type_defs import MessageAttributeValueTypeDef
|
|
14
|
+
from typing_extensions import NotRequired
|
|
12
15
|
|
|
13
16
|
env = environ.Env()
|
|
14
17
|
|
|
15
18
|
|
|
19
|
+
class ParserInstructionsDict(TypedDict):
|
|
20
|
+
name: NotRequired[Optional[str]]
|
|
21
|
+
cite: NotRequired[Optional[str]]
|
|
22
|
+
court: NotRequired[Optional[str]]
|
|
23
|
+
date: NotRequired[Optional[str]]
|
|
24
|
+
uri: NotRequired[Optional[str]]
|
|
25
|
+
documentType: NotRequired[Optional[str]]
|
|
26
|
+
published: NotRequired[bool]
|
|
27
|
+
|
|
28
|
+
|
|
16
29
|
@overload
|
|
17
30
|
def create_aws_client(service: Literal["s3"]) -> S3Client:
|
|
18
31
|
...
|
|
@@ -64,10 +77,14 @@ def generate_signed_asset_url(key: str) -> str:
|
|
|
64
77
|
)
|
|
65
78
|
|
|
66
79
|
|
|
67
|
-
def
|
|
68
|
-
|
|
80
|
+
def generate_docx_key(uri: str) -> str:
|
|
81
|
+
"""from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
|
|
82
|
+
return f'{uri}/{uri.replace("/", "_")}.docx'
|
|
69
83
|
|
|
70
|
-
|
|
84
|
+
|
|
85
|
+
def generate_docx_url(uri: str) -> str:
|
|
86
|
+
"""from a canonical caselaw URI (eat/2022/1) return a signed S3 link for the front end"""
|
|
87
|
+
return generate_signed_asset_url(generate_docx_key(uri))
|
|
71
88
|
|
|
72
89
|
|
|
73
90
|
def generate_pdf_url(uri: str) -> str:
|
|
@@ -110,7 +127,7 @@ def publish_documents(uri: str) -> None:
|
|
|
110
127
|
|
|
111
128
|
if not key.endswith("parser.log") and not key.endswith(".tar.gz"):
|
|
112
129
|
source: CopySourceTypeDef = {"Bucket": private_bucket, "Key": key}
|
|
113
|
-
extra_args = {
|
|
130
|
+
extra_args: dict[str, str] = {}
|
|
114
131
|
try:
|
|
115
132
|
client.copy(source, public_bucket, key, extra_args)
|
|
116
133
|
except botocore.client.ClientError as e:
|
|
@@ -127,7 +144,7 @@ def delete_documents_from_private_bucket(uri: str) -> None:
|
|
|
127
144
|
delete_from_bucket(uri, env("PRIVATE_ASSET_BUCKET"))
|
|
128
145
|
|
|
129
146
|
|
|
130
|
-
def
|
|
147
|
+
def announce_document_event(uri: str, status: str, enrich: bool = False) -> None:
|
|
131
148
|
client = create_sns_client()
|
|
132
149
|
|
|
133
150
|
message_attributes: dict[str, MessageAttributeValueTypeDef] = {}
|
|
@@ -146,7 +163,7 @@ def notify_changed(uri: str, status: str, enrich: bool = False) -> None:
|
|
|
146
163
|
}
|
|
147
164
|
|
|
148
165
|
client.publish(
|
|
149
|
-
TopicArn=env("SNS_TOPIC"),
|
|
166
|
+
TopicArn=env("SNS_TOPIC"), # this is the ANNOUNCE SNS topic
|
|
150
167
|
Message=json.dumps({"uri_reference": uri, "status": status}),
|
|
151
168
|
Subject=f"Updated: {uri} {status}",
|
|
152
169
|
MessageAttributes=message_attributes,
|
|
@@ -189,3 +206,40 @@ def build_new_key(old_key: str, new_uri: str) -> str:
|
|
|
189
206
|
return f"{new_uri}/{new_filename}.{old_filename.split('.')[-1]}"
|
|
190
207
|
else:
|
|
191
208
|
return f"{new_uri}/{old_filename}"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def request_parse(
|
|
212
|
+
uri: str,
|
|
213
|
+
reference: Optional[str],
|
|
214
|
+
parser_instructions: Optional[ParserInstructionsDict] = None,
|
|
215
|
+
) -> None:
|
|
216
|
+
client = create_sns_client()
|
|
217
|
+
|
|
218
|
+
if parser_instructions is None:
|
|
219
|
+
parser_instructions = ParserInstructionsDict({})
|
|
220
|
+
|
|
221
|
+
message_to_send = {
|
|
222
|
+
"properties": {
|
|
223
|
+
"messageType": "uk.gov.nationalarchives.da.messages.request.courtdocument.parse.RequestCourtDocumentParse",
|
|
224
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc)
|
|
225
|
+
.isoformat()
|
|
226
|
+
.replace("+00:00", "Z"),
|
|
227
|
+
"function": "fcl-judgment-parse-request",
|
|
228
|
+
"producer": "FCL",
|
|
229
|
+
"executionId": f"fcl_ex_id_{uuid.uuid4()}",
|
|
230
|
+
"parentExecutionId": None,
|
|
231
|
+
},
|
|
232
|
+
"parameters": {
|
|
233
|
+
"s3Bucket": env("PRIVATE_ASSET_BUCKET"),
|
|
234
|
+
"s3Key": generate_docx_key(uri),
|
|
235
|
+
"reference": reference or f"FCL-{uuid.uuid4()}",
|
|
236
|
+
"originator": "FCL",
|
|
237
|
+
"parserInstructions": parser_instructions,
|
|
238
|
+
},
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
client.publish(
|
|
242
|
+
TopicArn=env("REPARSE_SNS_TOPIC"),
|
|
243
|
+
Message=json.dumps(message_to_send),
|
|
244
|
+
Subject=f"Reparse request: {uri}",
|
|
245
|
+
)
|
|
@@ -73,6 +73,8 @@ class SearchParameters:
|
|
|
73
73
|
"ewhc/kb": "ewhc/qb",
|
|
74
74
|
"ewhc/scco": "ewhc/costs",
|
|
75
75
|
"ewhc/costs": "ewhc/scco",
|
|
76
|
+
"ukait": "ukut/iac",
|
|
77
|
+
"ukut/iac": "ukait",
|
|
76
78
|
}
|
|
77
79
|
alternative_court_names = set()
|
|
78
80
|
for primary_name, secondary_name in ALTERNATIVE_COURT_NAMES_MAP.items():
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
xquery version "1.0-ml";
|
|
2
2
|
|
|
3
|
+
import module namespace helper = "https://caselaw.nationalarchives.gov.uk/helper" at "/judgments/search/helper.xqy";
|
|
4
|
+
|
|
3
5
|
declare variable $show_unpublished as xs:boolean? external;
|
|
4
6
|
declare variable $uri as xs:string external;
|
|
5
7
|
declare variable $version_uri as xs:string? external;
|
|
6
8
|
declare variable $img_location as xs:string? external;
|
|
7
9
|
declare variable $xsl_filename as xs:string? external;
|
|
10
|
+
declare variable $query as xs:string? external;
|
|
8
11
|
|
|
9
12
|
let $judgment_published_property := xdmp:document-get-properties($uri, xs:QName("published"))[1]
|
|
10
13
|
let $is_published := $judgment_published_property/text()
|
|
@@ -14,6 +17,27 @@ let $xsl_path := fn:concat("judgments/xslts/", $xsl_filename)
|
|
|
14
17
|
|
|
15
18
|
let $params := map:map()
|
|
16
19
|
|
|
20
|
+
let $number_marks_xslt := (
|
|
21
|
+
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
|
22
|
+
version="2.0">
|
|
23
|
+
<xsl:output method="html" />
|
|
24
|
+
<xsl:template match="@*|node()">
|
|
25
|
+
<xsl:copy>
|
|
26
|
+
<xsl:apply-templates select="@*|node()"/>
|
|
27
|
+
</xsl:copy>
|
|
28
|
+
</xsl:template>
|
|
29
|
+
<xsl:template match="mark">
|
|
30
|
+
<xsl:copy>
|
|
31
|
+
<xsl:copy-of select="@*" />
|
|
32
|
+
<xsl:attribute name="id">
|
|
33
|
+
<xsl:text>mark_</xsl:text>
|
|
34
|
+
<xsl:value-of select="count(preceding::mark)"/>
|
|
35
|
+
</xsl:attribute>
|
|
36
|
+
<xsl:apply-templates />
|
|
37
|
+
</xsl:copy>
|
|
38
|
+
</xsl:template>
|
|
39
|
+
</xsl:stylesheet>
|
|
40
|
+
)
|
|
17
41
|
(: change the image-base of the document to match the location of the assets in $image_base
|
|
18
42
|
so that references to images point to the correct places on the internet :)
|
|
19
43
|
let $_put := map:put(
|
|
@@ -26,7 +50,7 @@ let $_ := if (not(exists($document_to_transform))) then
|
|
|
26
50
|
fn:error(xs:QName("FCL_DOCUMENTNOTFOUND"), "No XML document was found to transform")
|
|
27
51
|
) else ()
|
|
28
52
|
|
|
29
|
-
let $
|
|
53
|
+
let $retrieved_value := if (xs:boolean($is_published) or $show_unpublished) then
|
|
30
54
|
xdmp:xslt-invoke($xsl_path,
|
|
31
55
|
$document_to_transform,
|
|
32
56
|
$params
|
|
@@ -34,4 +58,16 @@ let $return_value := if (xs:boolean($is_published) or $show_unpublished) then
|
|
|
34
58
|
else
|
|
35
59
|
()
|
|
36
60
|
|
|
61
|
+
let $return_value := if($query) then
|
|
62
|
+
xdmp:xslt-eval(
|
|
63
|
+
$number_marks_xslt,
|
|
64
|
+
cts:highlight(
|
|
65
|
+
$retrieved_value,
|
|
66
|
+
helper:make-q-query($query),
|
|
67
|
+
<mark>{$cts:text}</mark>
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
else
|
|
71
|
+
$retrieved_value
|
|
72
|
+
|
|
37
73
|
return $return_value
|
|
@@ -192,6 +192,7 @@ class XsltDict(MarkLogicAPIDict):
|
|
|
192
192
|
# xslt_transform.xqy
|
|
193
193
|
class XsltTransformDict(MarkLogicAPIDict):
|
|
194
194
|
img_location: Optional[str]
|
|
195
|
+
query: Optional[str]
|
|
195
196
|
show_unpublished: Optional[bool]
|
|
196
197
|
uri: MarkLogicDocumentURIString
|
|
197
198
|
version_uri: Optional[MarkLogicDocumentVersionURIString]
|
{ds_caselaw_marklogic_api_client-17.1.0 → ds_caselaw_marklogic_api_client-17.3.0}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|