ds-caselaw-marklogic-api-client 40.0.0__tar.gz → 44.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/PKG-INFO +4 -3
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/pyproject.toml +12 -11
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/Client.py +50 -6
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/client_helpers/__init__.py +35 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/factories.py +3 -1
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/managers/merge/__init__.py +51 -0
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/managers/merge/checks.py +79 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/__init__.py +60 -3
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/body.py +55 -10
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/models/documents/stub.py +50 -0
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/models/documents/templates/judgment.xml +67 -0
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/models/documents/templates/original.xml +68 -0
- ds_caselaw_marklogic_api_client-40.0.0/src/caselawclient/client_helpers/__init__.py → ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/models/documents/versions.py +0 -35
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/xml.py +4 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/__init__.py +4 -1
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/collection.py +2 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/utilities/aws.py +42 -1
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/search_parameters.py +4 -0
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/types.py +150 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xml_helpers.py +18 -2
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +15 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_judgment.xqy +2 -12
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/xquery/get_locked_documents.xqy +10 -0
- ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/xquery/get_pending_parse_for_version_count.xqy +27 -0
- ds_caselaw_marklogic_api_client-40.0.0/src/caselawclient/xquery/get_pending_parse_for_version.xqy → ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/xquery/get_pending_parse_for_version_documents.xqy +3 -2
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/xslt_transform.xqy +4 -28
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery_type_dicts.py +13 -2
- ds_caselaw_marklogic_api_client-40.0.0/src/caselawclient/types.py +0 -71
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/README.md +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/errors.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/identifier_resolution.py +0 -0
- /ds_caselaw_marklogic_api_client-40.0.0/src/caselawclient/py.typed → /ds_caselaw_marklogic_api_client-44.3.0/src/caselawclient/managers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/comparison.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/exceptions.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/statuses.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/exceptions.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/fclid.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/neutral_citation.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/identifiers/unpacker.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/judgments.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/parser_logs.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/press_summaries.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/utilities/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/responses/search_result.py +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_datetime_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xquery/xslt.xqy +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xslt/modify_xml_live.xsl +0 -0
- {ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/src/caselawclient/xslt/sample.xsl +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 44.3.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
5
|
Keywords: national archives,caselaw
|
|
6
6
|
Author: The National Archives
|
|
@@ -9,16 +9,17 @@ Classifier: Programming Language :: Python :: 3
|
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.12
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.13
|
|
11
11
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
12
|
-
Requires-Dist: certifi (>=2025.
|
|
12
|
+
Requires-Dist: certifi (>=2025.11.12,<2025.12.0)
|
|
13
13
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
14
14
|
Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
|
|
15
15
|
Requires-Dist: django-environ (>=0.12.0)
|
|
16
|
-
Requires-Dist: ds-caselaw-utils (>=2.
|
|
16
|
+
Requires-Dist: ds-caselaw-utils (>=2.10.0,<3.0.0)
|
|
17
17
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
18
18
|
Requires-Dist: lxml (>=6.0.0,<7.0.0)
|
|
19
19
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
20
20
|
Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
|
|
21
21
|
Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.12.3,<3.0.0)
|
|
22
23
|
Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
|
|
23
24
|
Requires-Dist: pytz (>2024)
|
|
24
25
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
{ds_caselaw_marklogic_api_client-40.0.0 → ds_caselaw_marklogic_api_client-44.3.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "44.3.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -12,7 +12,7 @@ packages = [
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
14
|
python = "^3.12.0"
|
|
15
|
-
certifi = ">=2025.
|
|
15
|
+
certifi = ">=2025.11.12,<2025.12.0"
|
|
16
16
|
charset-normalizer = "^3.0.0"
|
|
17
17
|
django-environ = ">=0.12.0"
|
|
18
18
|
idna = "^3.4"
|
|
@@ -20,7 +20,7 @@ requests = "^2.28.2"
|
|
|
20
20
|
requests-toolbelt = ">=0.10.1,<1.1.0"
|
|
21
21
|
memoization = "^0.4.0"
|
|
22
22
|
lxml = "^6.0.0"
|
|
23
|
-
ds-caselaw-utils = "^2.
|
|
23
|
+
ds-caselaw-utils = "^2.10.0"
|
|
24
24
|
boto3 = "^1.26.112"
|
|
25
25
|
typing-extensions = "^4.7.1"
|
|
26
26
|
mypy-boto3-s3 = "^1.26.104"
|
|
@@ -30,22 +30,23 @@ python-dateutil = "^2.9.0-post.0"
|
|
|
30
30
|
saxonche = "^12.5.0"
|
|
31
31
|
sqids = "^0.5.0"
|
|
32
32
|
defusedxml = "^0.7.1"
|
|
33
|
+
pydantic = "^2.12.3"
|
|
33
34
|
|
|
34
35
|
[tool.poetry.group.dev.dependencies]
|
|
35
|
-
coverage = "7.
|
|
36
|
-
pytest = "
|
|
37
|
-
pytest-cov = "
|
|
38
|
-
beautifulsoup4 = "4.
|
|
36
|
+
coverage = "7.13.0"
|
|
37
|
+
pytest = "9.0.2"
|
|
38
|
+
pytest-cov = "7.0.0"
|
|
39
|
+
beautifulsoup4 = "4.14.3"
|
|
39
40
|
responses = "0.25.8"
|
|
40
|
-
python-dotenv = "1.
|
|
41
|
-
time-machine = "
|
|
42
|
-
moto = {version = "5.1.
|
|
41
|
+
python-dotenv = "1.2.1"
|
|
42
|
+
time-machine = "3.1.0"
|
|
43
|
+
moto = {version = "5.1.18", extras = ["all"]}
|
|
43
44
|
|
|
44
45
|
[tool.poetry.group.docs]
|
|
45
46
|
optional = true
|
|
46
47
|
|
|
47
48
|
[tool.poetry.group.docs.dependencies]
|
|
48
|
-
pdoc = "^
|
|
49
|
+
pdoc = "^16.0.0"
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
[tool.commitizen]
|
|
@@ -21,19 +21,20 @@ from requests.structures import CaseInsensitiveDict
|
|
|
21
21
|
from requests_toolbelt.multipart import decoder
|
|
22
22
|
|
|
23
23
|
from caselawclient import xquery_type_dicts as query_dicts
|
|
24
|
-
from caselawclient.client_helpers import VersionAnnotation
|
|
25
24
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
26
25
|
from caselawclient.models.documents import (
|
|
27
26
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
28
27
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
29
28
|
Document,
|
|
30
29
|
)
|
|
30
|
+
from caselawclient.models.documents.versions import VersionAnnotation
|
|
31
31
|
from caselawclient.models.judgments import Judgment
|
|
32
32
|
from caselawclient.models.press_summaries import PressSummary
|
|
33
33
|
from caselawclient.models.utilities import move
|
|
34
34
|
from caselawclient.search_parameters import SearchParameters
|
|
35
|
-
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
|
|
35
|
+
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentLock, DocumentURIString
|
|
36
36
|
from caselawclient.xquery_type_dicts import (
|
|
37
|
+
CheckContentHashUniqueByUriDict,
|
|
37
38
|
MarkLogicDocumentURIString,
|
|
38
39
|
MarkLogicDocumentVersionURIString,
|
|
39
40
|
MarkLogicPrivilegeURIString,
|
|
@@ -728,6 +729,14 @@ class MarklogicApiClient:
|
|
|
728
729
|
== 0
|
|
729
730
|
)
|
|
730
731
|
|
|
732
|
+
def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
|
|
733
|
+
"""
|
|
734
|
+
Returns True if the content hash for this document is unique (not shared with other documents).
|
|
735
|
+
"""
|
|
736
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
737
|
+
vars: CheckContentHashUniqueByUriDict = {"uri": uri}
|
|
738
|
+
return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
|
|
739
|
+
|
|
731
740
|
def eval(
|
|
732
741
|
self,
|
|
733
742
|
xquery_path: str,
|
|
@@ -793,6 +802,8 @@ class MarklogicApiClient:
|
|
|
793
802
|
:param judge:
|
|
794
803
|
:param party:
|
|
795
804
|
:param neutral_citation:
|
|
805
|
+
:param document_name:
|
|
806
|
+
:param consignment_number:
|
|
796
807
|
:param specific_keyword:
|
|
797
808
|
:param order:
|
|
798
809
|
:param date_from:
|
|
@@ -1211,13 +1222,13 @@ class MarklogicApiClient:
|
|
|
1211
1222
|
|
|
1212
1223
|
return (int(table[1][1]), int(table[1][2]))
|
|
1213
1224
|
|
|
1214
|
-
def
|
|
1225
|
+
def get_documents_pending_parse_for_version(
|
|
1215
1226
|
self,
|
|
1216
1227
|
target_version: tuple[int, int],
|
|
1217
1228
|
maximum_records: int = 1000,
|
|
1218
1229
|
) -> list[list[Any]]:
|
|
1219
|
-
"""Retrieve documents which are not yet parsed with a given version."""
|
|
1220
|
-
vars: query_dicts.
|
|
1230
|
+
"""Retrieve a list of documents which are not yet parsed with a given version."""
|
|
1231
|
+
vars: query_dicts.GetPendingParseForVersionDocumentsDict = {
|
|
1221
1232
|
"target_major_version": target_version[0],
|
|
1222
1233
|
"target_minor_version": target_version[1],
|
|
1223
1234
|
"maximum_records": maximum_records,
|
|
@@ -1226,13 +1237,33 @@ class MarklogicApiClient:
|
|
|
1226
1237
|
get_single_string_from_marklogic_response(
|
|
1227
1238
|
self._send_to_eval(
|
|
1228
1239
|
vars,
|
|
1229
|
-
"
|
|
1240
|
+
"get_pending_parse_for_version_documents.xqy",
|
|
1230
1241
|
),
|
|
1231
1242
|
),
|
|
1232
1243
|
)
|
|
1233
1244
|
|
|
1234
1245
|
return results
|
|
1235
1246
|
|
|
1247
|
+
def get_count_pending_parse_for_version(
|
|
1248
|
+
self,
|
|
1249
|
+
target_version: tuple[int, int],
|
|
1250
|
+
) -> int:
|
|
1251
|
+
"""Get the total number of documents which are not yet parsed with a given version."""
|
|
1252
|
+
vars: query_dicts.GetPendingParseForVersionCountDict = {
|
|
1253
|
+
"target_major_version": target_version[0],
|
|
1254
|
+
"target_minor_version": target_version[1],
|
|
1255
|
+
}
|
|
1256
|
+
results = json.loads(
|
|
1257
|
+
get_single_string_from_marklogic_response(
|
|
1258
|
+
self._send_to_eval(
|
|
1259
|
+
vars,
|
|
1260
|
+
"get_pending_parse_for_version_count.xqy",
|
|
1261
|
+
),
|
|
1262
|
+
),
|
|
1263
|
+
)
|
|
1264
|
+
|
|
1265
|
+
return int(results[1][0])
|
|
1266
|
+
|
|
1236
1267
|
def get_recently_parsed(
|
|
1237
1268
|
self,
|
|
1238
1269
|
) -> list[list[Any]]:
|
|
@@ -1248,6 +1279,19 @@ class MarklogicApiClient:
|
|
|
1248
1279
|
|
|
1249
1280
|
return results
|
|
1250
1281
|
|
|
1282
|
+
def get_locked_documents(
|
|
1283
|
+
self,
|
|
1284
|
+
) -> list[DocumentLock]:
|
|
1285
|
+
"""Retrieve all currently locked documents."""
|
|
1286
|
+
results = [
|
|
1287
|
+
DocumentLock.from_string(lock)
|
|
1288
|
+
for lock in get_multipart_strings_from_marklogic_response(
|
|
1289
|
+
self._send_to_eval({}, "get_locked_documents.xqy")
|
|
1290
|
+
)
|
|
1291
|
+
]
|
|
1292
|
+
|
|
1293
|
+
return sorted(results, key=lambda lock: lock.timestamp)
|
|
1294
|
+
|
|
1251
1295
|
def get_missing_fclid(
|
|
1252
1296
|
self,
|
|
1253
1297
|
maximum_records: int = 50,
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
3
|
+
from caselawclient.xml_helpers import DEFAULT_NAMESPACES
|
|
4
|
+
|
|
5
|
+
from ..models.documents import Document
|
|
6
|
+
from ..models.judgments import Judgment
|
|
7
|
+
from ..models.parser_logs import ParserLog
|
|
8
|
+
from ..models.press_summaries import PressSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CannotDetermineDocumentType(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_document_type_class(xml: bytes) -> type[Document]:
|
|
16
|
+
"""Attempt to get the type of the document based on the top-level structure of the XML document."""
|
|
17
|
+
|
|
18
|
+
node = etree.fromstring(xml)
|
|
19
|
+
|
|
20
|
+
# If the main node is `<judgment>`, it's a judgment
|
|
21
|
+
if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES):
|
|
22
|
+
return Judgment
|
|
23
|
+
|
|
24
|
+
# If the main node is `<doc name='pressSummary'>`, it's a press summary
|
|
25
|
+
if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES):
|
|
26
|
+
return PressSummary
|
|
27
|
+
|
|
28
|
+
# If the document is a parser error with a root element of `error`, it's not of a special type.
|
|
29
|
+
if node.xpath("/error", namespaces=DEFAULT_NAMESPACES):
|
|
30
|
+
return ParserLog
|
|
31
|
+
|
|
32
|
+
# Otherwise, we don't know for sure. Fail out.
|
|
33
|
+
raise CannotDetermineDocumentType(
|
|
34
|
+
"Unable to determine the Document type by its XML",
|
|
35
|
+
)
|
|
@@ -20,7 +20,7 @@ T = TypeVar("T")
|
|
|
20
20
|
|
|
21
21
|
DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
|
|
22
22
|
<judgment name="decision">
|
|
23
|
-
<meta/><header
|
|
23
|
+
<meta/><header><p>Header contains text</p></header>
|
|
24
24
|
<judgmentBody>
|
|
25
25
|
<decision>
|
|
26
26
|
<p>This is a document.</p>
|
|
@@ -61,6 +61,8 @@ class DocumentFactory:
|
|
|
61
61
|
"source_name": "Example Uploader",
|
|
62
62
|
"source_email": "uploader@example.com",
|
|
63
63
|
"consignment_reference": "TDR-12345",
|
|
64
|
+
"first_published_datetime": None,
|
|
65
|
+
"has_ever_been_published": False,
|
|
64
66
|
"assigned_to": "",
|
|
65
67
|
"versions": [],
|
|
66
68
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import caselawclient.managers.merge.checks as checks
|
|
2
|
+
from caselawclient.models.documents import Document
|
|
3
|
+
from caselawclient.types import SuccessFailureMessageTuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _combine_list_of_successfailure_results(
|
|
7
|
+
validations: list[SuccessFailureMessageTuple],
|
|
8
|
+
) -> SuccessFailureMessageTuple:
|
|
9
|
+
"""Given a list of SuccessFailureMessageTuples, combine the success/failure states and any messages into a single new object representing the overall success/failure state."""
|
|
10
|
+
success = True
|
|
11
|
+
messages: list[str] = []
|
|
12
|
+
|
|
13
|
+
for validation in validations:
|
|
14
|
+
if validation.success is False:
|
|
15
|
+
success = False
|
|
16
|
+
|
|
17
|
+
messages += validation.messages
|
|
18
|
+
|
|
19
|
+
return SuccessFailureMessageTuple(success, messages)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MergeManager:
|
|
23
|
+
@classmethod
|
|
24
|
+
def check_document_is_safe_as_merge_source(cls, source_document: Document) -> SuccessFailureMessageTuple:
|
|
25
|
+
"""
|
|
26
|
+
Is the given document safe to be considered as a merge source?
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
return _combine_list_of_successfailure_results(
|
|
30
|
+
[
|
|
31
|
+
checks.check_document_is_not_version(source_document),
|
|
32
|
+
checks.check_document_has_only_one_version(source_document),
|
|
33
|
+
checks.check_document_has_never_been_published(source_document),
|
|
34
|
+
checks.check_document_is_safe_to_delete(source_document),
|
|
35
|
+
]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def check_source_document_is_safe_to_merge_into_target(
|
|
40
|
+
cls, source_document: Document, target_document: Document
|
|
41
|
+
) -> SuccessFailureMessageTuple:
|
|
42
|
+
"""Is the given source document safe to merge into a given target?"""
|
|
43
|
+
|
|
44
|
+
return _combine_list_of_successfailure_results(
|
|
45
|
+
[
|
|
46
|
+
checks.check_documents_are_not_same_document(source_document, target_document),
|
|
47
|
+
checks.check_document_is_not_version(target_document),
|
|
48
|
+
checks.check_documents_are_same_type(source_document, target_document),
|
|
49
|
+
checks.check_source_document_is_newer_than_target(source_document, target_document),
|
|
50
|
+
]
|
|
51
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from caselawclient.models.documents import Document
|
|
2
|
+
from caselawclient.types import SuccessFailureMessageTuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def check_document_is_not_version(document: Document) -> SuccessFailureMessageTuple:
|
|
6
|
+
"""Check that the document URI isn't a specific version"""
|
|
7
|
+
if document.is_version:
|
|
8
|
+
return SuccessFailureMessageTuple(
|
|
9
|
+
False,
|
|
10
|
+
["This document is a specific version, and cannot be used as a merge source"],
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
return SuccessFailureMessageTuple(True, [])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_document_has_only_one_version(document: Document) -> SuccessFailureMessageTuple:
|
|
17
|
+
"""Make sure the document has exactly one version."""
|
|
18
|
+
if len(document.versions) > 1:
|
|
19
|
+
return SuccessFailureMessageTuple(
|
|
20
|
+
False,
|
|
21
|
+
["This document has more than one version"],
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return SuccessFailureMessageTuple(True, [])
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def check_document_has_never_been_published(document: Document) -> SuccessFailureMessageTuple:
|
|
28
|
+
"""Make sure the document has never been published."""
|
|
29
|
+
if document.has_ever_been_published:
|
|
30
|
+
return SuccessFailureMessageTuple(
|
|
31
|
+
False,
|
|
32
|
+
["This document has previously been published"],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
return SuccessFailureMessageTuple(True, [])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def check_document_is_safe_to_delete(document: Document) -> SuccessFailureMessageTuple:
|
|
39
|
+
"""Make sure the document is safe to delete."""
|
|
40
|
+
if not document.safe_to_delete:
|
|
41
|
+
return SuccessFailureMessageTuple(
|
|
42
|
+
False,
|
|
43
|
+
["This document cannot be deleted because it is published"],
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return SuccessFailureMessageTuple(True, [])
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def check_documents_are_not_same_document(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
|
|
50
|
+
"""Check that two documents aren't actually the same"""
|
|
51
|
+
if document_one.uri == document_two.uri:
|
|
52
|
+
return SuccessFailureMessageTuple(
|
|
53
|
+
False,
|
|
54
|
+
["You cannot merge a document with itself"],
|
|
55
|
+
)
|
|
56
|
+
return SuccessFailureMessageTuple(True, [])
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def check_documents_are_same_type(document_one: Document, document_two: Document) -> SuccessFailureMessageTuple:
|
|
60
|
+
"""Check to see if this document is the same type as a target document."""
|
|
61
|
+
if type(document_one) is not type(document_two):
|
|
62
|
+
return SuccessFailureMessageTuple(
|
|
63
|
+
False,
|
|
64
|
+
[
|
|
65
|
+
f"The type of {document_one.uri} ({type(document_one).document_noun}) does not match the type of {document_two.uri} ({type(document_two).document_noun})"
|
|
66
|
+
],
|
|
67
|
+
)
|
|
68
|
+
return SuccessFailureMessageTuple(True, [])
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def check_source_document_is_newer_than_target(
|
|
72
|
+
source_document: Document, target_document: Document
|
|
73
|
+
) -> SuccessFailureMessageTuple:
|
|
74
|
+
"""Check to see if the created datetime of the latest version of this document is newer than the created datetime of the latest version of a target document."""
|
|
75
|
+
if source_document.version_created_datetime < target_document.version_created_datetime:
|
|
76
|
+
return SuccessFailureMessageTuple(
|
|
77
|
+
False, [f"The document at {source_document.uri} is older than the latest version of {target_document.uri}"]
|
|
78
|
+
)
|
|
79
|
+
return SuccessFailureMessageTuple(True, [])
|
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
7
7
|
from ds_caselaw_utils import courts
|
|
8
8
|
from ds_caselaw_utils.courts import CourtNotFoundException
|
|
9
9
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
10
|
+
from pydantic import TypeAdapter
|
|
10
11
|
from requests_toolbelt.multipart import decoder
|
|
11
12
|
|
|
12
13
|
import caselawclient.models.documents.comparison as comparison
|
|
@@ -16,6 +17,7 @@ from caselawclient.errors import (
|
|
|
16
17
|
OnlySupportedOnVersion,
|
|
17
18
|
)
|
|
18
19
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
20
|
+
from caselawclient.models.documents.versions import AnnotationDataDict
|
|
19
21
|
from caselawclient.models.identifiers import Identifier
|
|
20
22
|
from caselawclient.models.identifiers.exceptions import IdentifierValidationException
|
|
21
23
|
from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
|
|
@@ -24,6 +26,7 @@ from caselawclient.models.utilities import VersionsDict, extract_version, render
|
|
|
24
26
|
from caselawclient.models.utilities.aws import (
|
|
25
27
|
ParserInstructionsDict,
|
|
26
28
|
announce_document_event,
|
|
29
|
+
are_unpublished_assets_clean,
|
|
27
30
|
check_docx_exists,
|
|
28
31
|
delete_documents_from_private_bucket,
|
|
29
32
|
generate_docx_url,
|
|
@@ -95,6 +98,16 @@ class Document:
|
|
|
95
98
|
True,
|
|
96
99
|
"The court for this {document_noun} is not valid",
|
|
97
100
|
),
|
|
101
|
+
(
|
|
102
|
+
"has_unique_content_hash",
|
|
103
|
+
True,
|
|
104
|
+
"There is another document with identical content",
|
|
105
|
+
),
|
|
106
|
+
(
|
|
107
|
+
"has_only_clean_assets",
|
|
108
|
+
True,
|
|
109
|
+
"An uncleaned asset exists for this document",
|
|
110
|
+
),
|
|
98
111
|
]
|
|
99
112
|
"""
|
|
100
113
|
A list of tuples in the form:
|
|
@@ -245,7 +258,7 @@ class Document:
|
|
|
245
258
|
"""
|
|
246
259
|
if self.is_version:
|
|
247
260
|
raise NotSupportedOnVersion(
|
|
248
|
-
"Cannot get versions of a version for {self.uri}",
|
|
261
|
+
f"Cannot get versions of a version for {self.uri}",
|
|
249
262
|
)
|
|
250
263
|
docs = []
|
|
251
264
|
for version in self.versions:
|
|
@@ -307,10 +320,37 @@ class Document:
|
|
|
307
320
|
|
|
308
321
|
@cached_property
|
|
309
322
|
def first_published_datetime(self) -> Optional[datetime.datetime]:
|
|
323
|
+
"""
|
|
324
|
+
Return the database value for the date and time this document was first published.
|
|
325
|
+
|
|
326
|
+
:return: The datetime value in the database for "first published".
|
|
327
|
+
"""
|
|
310
328
|
return self.api_client.get_datetime_property(self.uri, "first_published_datetime")
|
|
311
329
|
|
|
330
|
+
@cached_property
|
|
331
|
+
def first_published_datetime_display(self) -> Optional[datetime.datetime]:
|
|
332
|
+
"""
|
|
333
|
+
Return the display value for the date and time this document was first published.
|
|
334
|
+
|
|
335
|
+
A value of 1970-01-01 00:00 indicates that the document has been published previously, but the exact date and time is unknown. In this case, return `None`. This can be used alongside `has_ever_been_published` to indicate an "unknown" state.
|
|
336
|
+
|
|
337
|
+
:return: The datetime value to be displayed to end users for "first published".
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
if self.first_published_datetime == datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc):
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
return self.first_published_datetime
|
|
344
|
+
|
|
312
345
|
@cached_property
|
|
313
346
|
def has_ever_been_published(self) -> bool:
|
|
347
|
+
"""
|
|
348
|
+
Do we consider this document to have ever been published?
|
|
349
|
+
|
|
350
|
+
This is `True` if either the document is currently published, or if `first_published_datetime` has any value (including the sentinel value).
|
|
351
|
+
|
|
352
|
+
:return: A boolean indicating if the document has ever been published.
|
|
353
|
+
"""
|
|
314
354
|
return self.is_published or self.first_published_datetime is not None
|
|
315
355
|
|
|
316
356
|
@cached_property
|
|
@@ -325,6 +365,23 @@ class Document:
|
|
|
325
365
|
def annotation(self) -> str:
|
|
326
366
|
return self.api_client.get_version_annotation(self.uri)
|
|
327
367
|
|
|
368
|
+
@cached_property
|
|
369
|
+
def structured_annotation(self) -> AnnotationDataDict:
|
|
370
|
+
annotation_data_dict_loader = TypeAdapter(AnnotationDataDict)
|
|
371
|
+
|
|
372
|
+
return annotation_data_dict_loader.validate_json(self.annotation)
|
|
373
|
+
|
|
374
|
+
@cached_property
|
|
375
|
+
def has_unique_content_hash(self) -> bool:
|
|
376
|
+
"""Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
|
|
377
|
+
return self.api_client.has_unique_content_hash(self.uri)
|
|
378
|
+
|
|
379
|
+
@cached_property
|
|
380
|
+
def has_only_clean_assets(self) -> bool:
|
|
381
|
+
"""False if any non-tar.gz assets associated with this document have not been cleaned."""
|
|
382
|
+
return True # TODO: Remove this once we have enabled the asset cleaning pipeline.
|
|
383
|
+
return are_unpublished_assets_clean(self.uri)
|
|
384
|
+
|
|
328
385
|
@cached_property
|
|
329
386
|
def version_created_datetime(self) -> datetime.datetime:
|
|
330
387
|
return self.api_client.get_version_created_datetime(self.uri)
|
|
@@ -540,14 +597,14 @@ class Document:
|
|
|
540
597
|
"""
|
|
541
598
|
Is it sensible to reparse this document?
|
|
542
599
|
"""
|
|
543
|
-
return self.docx_exists()
|
|
600
|
+
return self.docx_exists() and not self.body.has_external_data
|
|
544
601
|
|
|
545
602
|
@cached_property
|
|
546
603
|
def can_enrich(self) -> bool:
|
|
547
604
|
"""
|
|
548
605
|
Is it possible to enrich this document?
|
|
549
606
|
"""
|
|
550
|
-
return self.body.has_content
|
|
607
|
+
return self.body.has_content and not self.body.has_external_data
|
|
551
608
|
|
|
552
609
|
def validate_identifiers(self) -> SuccessFailureMessageTuple:
|
|
553
610
|
return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
|
|
@@ -6,9 +6,11 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
import pytz
|
|
8
8
|
from ds_caselaw_utils.types import CourtCode
|
|
9
|
+
from lxml import etree
|
|
9
10
|
from saxonche import PySaxonProcessor
|
|
10
11
|
|
|
11
12
|
from caselawclient.models.utilities.dates import parse_string_date_as_utc
|
|
13
|
+
from caselawclient.types import DocumentCategory
|
|
12
14
|
|
|
13
15
|
from .xml import XML
|
|
14
16
|
|
|
@@ -37,6 +39,9 @@ class DocumentBody:
|
|
|
37
39
|
def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
|
|
38
40
|
return self._xml.get_xpath_match_strings(xpath, namespaces)
|
|
39
41
|
|
|
42
|
+
def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
|
|
43
|
+
return self._xml.get_xpath_nodes(xpath, namespaces)
|
|
44
|
+
|
|
40
45
|
@cached_property
|
|
41
46
|
def name(self) -> str:
|
|
42
47
|
return self.get_xpath_match_string(
|
|
@@ -51,9 +56,46 @@ class DocumentBody:
|
|
|
51
56
|
def jurisdiction(self) -> str:
|
|
52
57
|
return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
|
|
53
58
|
|
|
59
|
+
@cached_property
|
|
60
|
+
def categories(self) -> list[DocumentCategory]:
|
|
61
|
+
xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
|
|
62
|
+
nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
|
|
63
|
+
|
|
64
|
+
categories: dict[str, DocumentCategory] = {}
|
|
65
|
+
children_map: dict[str, list[DocumentCategory]] = {}
|
|
66
|
+
|
|
67
|
+
for node in nodes:
|
|
68
|
+
name = node.text
|
|
69
|
+
if name is None or not name.strip():
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
category = DocumentCategory(name=name)
|
|
73
|
+
categories[name] = category
|
|
74
|
+
|
|
75
|
+
parent = node.get("parent")
|
|
76
|
+
|
|
77
|
+
if parent:
|
|
78
|
+
children_map.setdefault(parent, []).append(category)
|
|
79
|
+
|
|
80
|
+
for parent, subcategories in children_map.items():
|
|
81
|
+
if parent in categories:
|
|
82
|
+
categories[parent].subcategories.extend(subcategories)
|
|
83
|
+
|
|
84
|
+
top_level_categories = [
|
|
85
|
+
categories[name]
|
|
86
|
+
for node in nodes
|
|
87
|
+
if node.get("parent") is None
|
|
88
|
+
if (name := node.text) and name in categories
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
return top_level_categories
|
|
92
|
+
|
|
93
|
+
# NOTE: Deprecated - use categories function
|
|
54
94
|
@cached_property
|
|
55
95
|
def category(self) -> Optional[str]:
|
|
56
|
-
return self.get_xpath_match_string(
|
|
96
|
+
return self.get_xpath_match_string(
|
|
97
|
+
"/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
|
|
98
|
+
)
|
|
57
99
|
|
|
58
100
|
@cached_property
|
|
59
101
|
def case_number(self) -> Optional[str]:
|
|
@@ -134,15 +176,18 @@ class DocumentBody:
|
|
|
134
176
|
@cached_property
|
|
135
177
|
def has_content(self) -> bool:
|
|
136
178
|
"""If we do not have a word document, the XML will not contain
|
|
137
|
-
the contents of the judgment, but will contain a preamble
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
179
|
+
the contents of the judgment, but will contain a preamble (a header if a judgment or a preface if a press summary)"""
|
|
180
|
+
return bool(
|
|
181
|
+
self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
|
|
182
|
+
or self._xml.xml_as_tree.xpath("//akn:preface[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@cached_property
|
|
186
|
+
def has_external_data(self) -> bool:
|
|
187
|
+
"""Is there data which is not present within the source document:
|
|
188
|
+
is there a spreadsheet which has populated some fields. The current implementation
|
|
189
|
+
"is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
|
|
190
|
+
return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
|
|
146
191
|
|
|
147
192
|
@cache
|
|
148
193
|
def content_html(self, image_prefix: str) -> Optional[str]:
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from ds_caselaw_utils.courts import courts
|
|
5
|
+
from ds_caselaw_utils.types import CourtCode
|
|
6
|
+
from jinja2 import StrictUndefined, Template
|
|
7
|
+
from typing_extensions import TypedDict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PartyData(TypedDict):
|
|
11
|
+
role: Literal["Claimant", "Defendant"]
|
|
12
|
+
name: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EditorStubData(TypedDict):
|
|
16
|
+
decision_date: str # day precision
|
|
17
|
+
transform_datetime: str # second precision
|
|
18
|
+
court_code: str
|
|
19
|
+
title: str
|
|
20
|
+
year: str
|
|
21
|
+
case_numbers: list[str] # can be none
|
|
22
|
+
parties: list[PartyData] # (type (claimant|defendant), name)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RendererStubData(EditorStubData):
|
|
26
|
+
court_url: str # should be populated from utils/courts.cs
|
|
27
|
+
court_full_name: str # ditto
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def add_other_stub_fields(editor_data: EditorStubData) -> RendererStubData:
|
|
31
|
+
court = courts.get_court_by_code(CourtCode(editor_data["court_code"].upper()))
|
|
32
|
+
return {
|
|
33
|
+
**editor_data,
|
|
34
|
+
"court_url": court.identifier_iri,
|
|
35
|
+
"court_full_name": court.long_name,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def render_stub_xml(editor_data: EditorStubData) -> bytes:
|
|
40
|
+
render_data = add_other_stub_fields(editor_data)
|
|
41
|
+
from caselawclient.Client import ROOT_DIR
|
|
42
|
+
|
|
43
|
+
judgment_path = Path(ROOT_DIR) / "models" / "documents" / "templates" / "judgment.xml"
|
|
44
|
+
|
|
45
|
+
with (judgment_path).open("r") as f:
|
|
46
|
+
template = f.read()
|
|
47
|
+
|
|
48
|
+
rendered = bytes(Template(template, undefined=StrictUndefined).render(render_data).encode("utf-8"))
|
|
49
|
+
|
|
50
|
+
return rendered
|