ds-caselaw-marklogic-api-client 22.1.0__py3-none-any.whl → 23.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caselawclient/models/documents.py +24 -2
- caselawclient/models/utilities/aws.py +15 -1
- caselawclient/responses/search_response.py +17 -0
- {ds_caselaw_marklogic_api_client-22.1.0.dist-info → ds_caselaw_marklogic_api_client-23.0.1.dist-info}/METADATA +1 -1
- {ds_caselaw_marklogic_api_client-22.1.0.dist-info → ds_caselaw_marklogic_api_client-23.0.1.dist-info}/RECORD +7 -7
- {ds_caselaw_marklogic_api_client-22.1.0.dist-info → ds_caselaw_marklogic_api_client-23.0.1.dist-info}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-22.1.0.dist-info → ds_caselaw_marklogic_api_client-23.0.1.dist-info}/WHEEL +0 -0
|
@@ -22,6 +22,7 @@ from .utilities import VersionsDict, render_versions
|
|
|
22
22
|
from .utilities.aws import (
|
|
23
23
|
ParserInstructionsDict,
|
|
24
24
|
announce_document_event,
|
|
25
|
+
check_docx_exists,
|
|
25
26
|
delete_documents_from_private_bucket,
|
|
26
27
|
generate_docx_url,
|
|
27
28
|
generate_pdf_url,
|
|
@@ -167,13 +168,17 @@ class Document:
|
|
|
167
168
|
:return: `True` if the document exists, `False` otherwise."""
|
|
168
169
|
return self.api_client.document_exists(self.uri)
|
|
169
170
|
|
|
171
|
+
def docx_exists(self) -> bool:
|
|
172
|
+
"""There is a docx in S3 private bucket for this Document"""
|
|
173
|
+
return check_docx_exists(self.uri)
|
|
174
|
+
|
|
170
175
|
@property
|
|
171
176
|
def best_human_identifier(self) -> Optional[str]:
|
|
172
177
|
"""
|
|
173
178
|
Some identifier that is understood by legal professionals to refer to this legal event
|
|
174
179
|
that is not the name of the document.
|
|
175
180
|
Typically, this will be the neutral citation number, should it exist.
|
|
176
|
-
Should typically overridden in subclasses.
|
|
181
|
+
Should typically be overridden in subclasses.
|
|
177
182
|
"""
|
|
178
183
|
return None
|
|
179
184
|
|
|
@@ -272,10 +277,12 @@ class Document:
|
|
|
272
277
|
|
|
273
278
|
@cached_property
|
|
274
279
|
def transformation_datetime(self) -> Optional[datetime.datetime]:
|
|
280
|
+
"""When was this document successfully parsed or reparsed (date from XML)"""
|
|
275
281
|
return self.get_latest_manifestation_datetime("transform")
|
|
276
282
|
|
|
277
283
|
@cached_property
|
|
278
284
|
def enrichment_datetime(self) -> Optional[datetime.datetime]:
|
|
285
|
+
"""When was this document successfully enriched (date from XML)"""
|
|
279
286
|
return self.get_latest_manifestation_datetime("tna-enriched")
|
|
280
287
|
|
|
281
288
|
@cached_property
|
|
@@ -591,7 +598,7 @@ class Document:
|
|
|
591
598
|
def move(self, new_citation: str) -> None:
|
|
592
599
|
self.api_client.update_document_uri(self.uri, new_citation)
|
|
593
600
|
|
|
594
|
-
def
|
|
601
|
+
def force_reparse(self) -> None:
|
|
595
602
|
"Send an SNS notification that triggers reparsing, also sending all editor-modifiable metadata and URI"
|
|
596
603
|
|
|
597
604
|
now = datetime.datetime.now(datetime.timezone.utc)
|
|
@@ -627,6 +634,21 @@ class Document:
|
|
|
627
634
|
parser_instructions=parser_instructions,
|
|
628
635
|
)
|
|
629
636
|
|
|
637
|
+
def reparse(self) -> bool:
|
|
638
|
+
if self.can_reparse:
|
|
639
|
+
self.force_reparse()
|
|
640
|
+
return True
|
|
641
|
+
return False
|
|
642
|
+
|
|
643
|
+
@cached_property
|
|
644
|
+
def can_reparse(self) -> bool:
|
|
645
|
+
"""
|
|
646
|
+
Is it sensible to reparse this document?
|
|
647
|
+
"""
|
|
648
|
+
if self.docx_exists():
|
|
649
|
+
return True
|
|
650
|
+
return False
|
|
651
|
+
|
|
630
652
|
class XML:
|
|
631
653
|
"""
|
|
632
654
|
Represents the XML of a document, and should contain all methods for interacting with it.
|
|
@@ -78,6 +78,20 @@ def generate_signed_asset_url(key: str) -> str:
|
|
|
78
78
|
)
|
|
79
79
|
|
|
80
80
|
|
|
81
|
+
def check_docx_exists(uri: str) -> bool:
|
|
82
|
+
"""Does the docx for a document URI actually exist?"""
|
|
83
|
+
bucket = env("PRIVATE_ASSET_BUCKET", None)
|
|
84
|
+
s3_key = generate_docx_key(uri)
|
|
85
|
+
client = create_s3_client()
|
|
86
|
+
try:
|
|
87
|
+
client.head_object(Bucket=bucket, Key=s3_key)
|
|
88
|
+
return True
|
|
89
|
+
except botocore.exceptions.ClientError as e:
|
|
90
|
+
if e.response["Error"]["Code"] == "404":
|
|
91
|
+
return False
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
|
|
81
95
|
def generate_docx_key(uri: str) -> str:
|
|
82
96
|
"""from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx"""
|
|
83
97
|
return f'{uri}/{uri.replace("/", "_")}.docx'
|
|
@@ -227,7 +241,7 @@ def request_parse(
|
|
|
227
241
|
.replace("+00:00", "Z"),
|
|
228
242
|
"function": "fcl-judgment-parse-request",
|
|
229
243
|
"producer": "FCL",
|
|
230
|
-
"executionId":
|
|
244
|
+
"executionId": str(uuid.uuid4()),
|
|
231
245
|
"parentExecutionId": None,
|
|
232
246
|
},
|
|
233
247
|
"parameters": {
|
|
@@ -45,3 +45,20 @@ class SearchResponse:
|
|
|
45
45
|
"//search:response/search:result", namespaces=self.NAMESPACES
|
|
46
46
|
)
|
|
47
47
|
return [SearchResult(result, self.client) for result in results]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def facets(self) -> dict[str, str]:
|
|
51
|
+
"""
|
|
52
|
+
Returns search facets from the SearchResponse as a dictionary
|
|
53
|
+
|
|
54
|
+
:return: A flattened dictionary of search facet values
|
|
55
|
+
"""
|
|
56
|
+
# TODO: preserve the name of the facet (e.g. "court", "year")
|
|
57
|
+
results = self.node.xpath(
|
|
58
|
+
"//search:response/search:facet/search:facet-value",
|
|
59
|
+
namespaces={"search": "http://marklogic.com/appservices/search"},
|
|
60
|
+
)
|
|
61
|
+
facets_dictionary = {
|
|
62
|
+
result.attrib["name"]: result.attrib["count"] for result in results
|
|
63
|
+
}
|
|
64
|
+
return facets_dictionary
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 23.0.1
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
6
|
Keywords: national archives,caselaw
|
|
@@ -5,16 +5,16 @@ caselawclient/client_helpers/search_helpers.py,sha256=DYgUltPq8fFI2KkLRqH1-8zpbb
|
|
|
5
5
|
caselawclient/content_hash.py,sha256=DF7ujrQPNf1bTSbK0mIIaC5qx6CmF5I0xlQ7uIG0zYI,2236
|
|
6
6
|
caselawclient/errors.py,sha256=3rsbOQ11hIhm7-UABcHNMcs9XgcrIzytAP0koyZBLWM,3195
|
|
7
7
|
caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
|
|
8
|
-
caselawclient/models/documents.py,sha256=
|
|
8
|
+
caselawclient/models/documents.py,sha256=olPW1m6yNRZurlxFWbl-SgPGFPHn0wz_SuUAfeaSt0w,22502
|
|
9
9
|
caselawclient/models/judgments.py,sha256=zufIORXux0OaOWs2bbVSd9xR-HSwrMJe5WAhHwJzzwM,1740
|
|
10
10
|
caselawclient/models/neutral_citation_mixin.py,sha256=G9QS5XZ0tf_VXTxt4Uryy_gZ4eBDsmChqEClLAntIwI,1810
|
|
11
11
|
caselawclient/models/press_summaries.py,sha256=2yx7xOqimjM8NsmueD0Ml3aev2fML3U-UksCRJRqPIM,1726
|
|
12
12
|
caselawclient/models/utilities/__init__.py,sha256=aL1a2nDacPxninETeaVZKwOxZemgvm73IcpWgMNXoGc,1100
|
|
13
|
-
caselawclient/models/utilities/aws.py,sha256=
|
|
13
|
+
caselawclient/models/utilities/aws.py,sha256=Mw244oUPeWB3p6Q7UYRKoy3aS0lE9E6lToE4MSVC5mw,8075
|
|
14
14
|
caselawclient/models/utilities/move.py,sha256=_SKzO1UVXHFIVvWfT4nuCwdov7acp8tYzzEg-vVfUyg,5372
|
|
15
15
|
caselawclient/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
caselawclient/responses/__init__.py,sha256=2-5NJn_PXPTje_W4dHeHYaNRN6vXK4UcB9eLLNUAKa4,67
|
|
17
|
-
caselawclient/responses/search_response.py,sha256=
|
|
17
|
+
caselawclient/responses/search_response.py,sha256=zXc_OSyGfoh_C1NHBEzjhiGj1tUIPaqgRUh7iA9EdY4,1959
|
|
18
18
|
caselawclient/responses/search_result.py,sha256=BSVE2zBy-sy93fghd_kqwkhgwcnx1mFHY1_HTj-qO70,8096
|
|
19
19
|
caselawclient/responses/xsl/search_match.xsl,sha256=4Sv--MrwBd7J48E9aI7jlFSXGlNi4dBqgzJ3bdMJ_ZU,1018
|
|
20
20
|
caselawclient/search_parameters.py,sha256=nR-UC1aWZbdXzXBrVDaHECU4Ro8Zi4JZATtgrpAVsKY,3342
|
|
@@ -60,7 +60,7 @@ caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21q
|
|
|
60
60
|
caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
|
|
61
61
|
caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
|
|
62
62
|
caselawclient/xquery_type_dicts.py,sha256=YOrXbEYJU84S-YwergCI12OL5Wrn_wpqMcqWpsQrKek,5590
|
|
63
|
-
ds_caselaw_marklogic_api_client-
|
|
64
|
-
ds_caselaw_marklogic_api_client-
|
|
65
|
-
ds_caselaw_marklogic_api_client-
|
|
66
|
-
ds_caselaw_marklogic_api_client-
|
|
63
|
+
ds_caselaw_marklogic_api_client-23.0.1.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
|
|
64
|
+
ds_caselaw_marklogic_api_client-23.0.1.dist-info/METADATA,sha256=Q1Rq_cBlSoVl5eVGKrAPYRbQ4qfKD7idunQFOaIV33s,4006
|
|
65
|
+
ds_caselaw_marklogic_api_client-23.0.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
66
|
+
ds_caselaw_marklogic_api_client-23.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|