ds-caselaw-marklogic-api-client 30.0.0__tar.gz → 44.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/PKG-INFO +14 -11
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/README.md +3 -1
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/pyproject.toml +20 -16
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/Client.py +107 -37
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/__init__.py +63 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/client_helpers/__init__.py +35 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/factories.py +82 -17
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/identifier_resolution.py +6 -6
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/managers/merge/__init__.py +51 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/managers/merge/checks.py +79 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/__init__.py +200 -63
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/body.py +72 -9
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/documents/comparison.py +42 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/exceptions.py +4 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/transforms/html.xsl +2 -2
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/xml.py +30 -1
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/identifiers/__init__.py +216 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/identifiers/collection.py +172 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/identifiers/exceptions.py +6 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/fclid.py +9 -4
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/neutral_citation.py +32 -6
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/press_summary_ncn.py +8 -4
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/identifiers/unpacker.py +22 -14
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/judgments.py +7 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/parser_logs.py +14 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/press_summaries.py +6 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/__init__.py +6 -7
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/aws.py +38 -6
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/search_result.py +41 -6
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/search_parameters.py +6 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/types.py +108 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xml_helpers.py +41 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +15 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment.xqy +2 -12
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xquery/get_missing_fclid.xqy +25 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/insert_document.xqy +2 -6
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +2 -3
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xquery/set_datetime_property.xqy +11 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/update_locked_judgment.xqy +1 -2
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/xslt_transform.xqy +4 -28
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery_type_dicts.py +22 -4
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xslt/modify_xml_live.xsl +70 -0
- ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/xslt/sample.xsl +26 -0
- ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/__init__.py +0 -40
- ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/models/identifiers/__init__.py +0 -185
- ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/types.py +0 -24
- ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/xml_helpers.py +0 -20
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/content_hash.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/errors.py +0 -0
- /ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/py.typed → /ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/managers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/documents/statuses.py +0 -0
- /ds_caselaw_marklogic_api_client-30.0.0/src/caselawclient/client_helpers/__init__.py → /ds_caselaw_marklogic_api_client-44.0.1/src/caselawclient/models/documents/versions.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/models/utilities/move.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/src/caselawclient/xquery/xslt.xqy +0 -0
|
@@ -1,32 +1,33 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 44.0.1
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
|
-
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
5
|
Keywords: national archives,caselaw
|
|
7
6
|
Author: The National Archives
|
|
8
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.12.0,<4.0.0
|
|
9
8
|
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
11
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
14
|
-
Requires-Dist: certifi (>=
|
|
12
|
+
Requires-Dist: certifi (>=2025.11.12,<2025.12.0)
|
|
15
13
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
14
|
+
Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
|
|
16
15
|
Requires-Dist: django-environ (>=0.12.0)
|
|
17
16
|
Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
|
|
18
17
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
19
|
-
Requires-Dist: lxml (>=
|
|
18
|
+
Requires-Dist: lxml (>=6.0.0,<7.0.0)
|
|
20
19
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
21
20
|
Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
|
|
22
21
|
Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.12.3,<3.0.0)
|
|
23
23
|
Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
|
|
24
|
-
Requires-Dist: pytz (
|
|
24
|
+
Requires-Dist: pytz (>2024)
|
|
25
25
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
|
26
26
|
Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
|
|
27
27
|
Requires-Dist: saxonche (>=12.5.0,<13.0.0)
|
|
28
28
|
Requires-Dist: sqids (>=0.5.0,<0.6.0)
|
|
29
29
|
Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
|
|
30
|
+
Project-URL: Homepage, https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
30
31
|
Description-Content-Type: text/markdown
|
|
31
32
|
|
|
32
33
|
# The National Archives: Find Case Law
|
|
@@ -35,7 +36,9 @@ This repository is part of the [Find Case Law](https://caselaw.nationalarchives.
|
|
|
35
36
|
|
|
36
37
|
# MarkLogic API Client
|
|
37
38
|
|
|
38
|
-
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
39
|
+
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
40
|
+

|
|
41
|
+

|
|
39
42
|
|
|
40
43
|
This is an API Client for connecting to Marklogic for The National Archive's Caselaw site.
|
|
41
44
|
|
|
@@ -4,7 +4,9 @@ This repository is part of the [Find Case Law](https://caselaw.nationalarchives.
|
|
|
4
4
|
|
|
5
5
|
# MarkLogic API Client
|
|
6
6
|
|
|
7
|
-
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
7
|
+
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
8
|
+

|
|
9
|
+

|
|
8
10
|
|
|
9
11
|
This is an API Client for connecting to Marklogic for The National Archive's Caselaw site.
|
|
10
12
|
|
{ds_caselaw_marklogic_api_client-30.0.0 → ds_caselaw_marklogic_api_client-44.0.1}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "44.0.1"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -11,39 +11,42 @@ packages = [
|
|
|
11
11
|
]
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
|
-
python = "^3.
|
|
15
|
-
certifi = ">=
|
|
14
|
+
python = "^3.12.0"
|
|
15
|
+
certifi = ">=2025.11.12,<2025.12.0"
|
|
16
16
|
charset-normalizer = "^3.0.0"
|
|
17
17
|
django-environ = ">=0.12.0"
|
|
18
18
|
idna = "^3.4"
|
|
19
19
|
requests = "^2.28.2"
|
|
20
20
|
requests-toolbelt = ">=0.10.1,<1.1.0"
|
|
21
21
|
memoization = "^0.4.0"
|
|
22
|
-
lxml = "^
|
|
22
|
+
lxml = "^6.0.0"
|
|
23
23
|
ds-caselaw-utils = "^2.0.0"
|
|
24
24
|
boto3 = "^1.26.112"
|
|
25
25
|
typing-extensions = "^4.7.1"
|
|
26
26
|
mypy-boto3-s3 = "^1.26.104"
|
|
27
27
|
mypy-boto3-sns = "^1.26.69"
|
|
28
|
-
pytz = "
|
|
28
|
+
pytz = ">2024"
|
|
29
29
|
python-dateutil = "^2.9.0-post.0"
|
|
30
30
|
saxonche = "^12.5.0"
|
|
31
31
|
sqids = "^0.5.0"
|
|
32
|
+
defusedxml = "^0.7.1"
|
|
33
|
+
pydantic = "^2.12.3"
|
|
32
34
|
|
|
33
35
|
[tool.poetry.group.dev.dependencies]
|
|
34
|
-
coverage = "
|
|
35
|
-
pytest = "
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
coverage = "7.12.0"
|
|
37
|
+
pytest = "9.0.1"
|
|
38
|
+
pytest-cov = "7.0.0"
|
|
39
|
+
beautifulsoup4 = "4.14.2"
|
|
40
|
+
responses = "0.25.8"
|
|
41
|
+
python-dotenv = "1.2.1"
|
|
42
|
+
time-machine = "2.19.0"
|
|
43
|
+
moto = {version = "5.1.17", extras = ["all"]}
|
|
41
44
|
|
|
42
45
|
[tool.poetry.group.docs]
|
|
43
46
|
optional = true
|
|
44
47
|
|
|
45
48
|
[tool.poetry.group.docs.dependencies]
|
|
46
|
-
pdoc = "^
|
|
49
|
+
pdoc = "^16.0.0"
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
[tool.commitizen]
|
|
@@ -69,14 +72,15 @@ line-length = 120
|
|
|
69
72
|
|
|
70
73
|
[tool.ruff.lint]
|
|
71
74
|
ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
|
|
72
|
-
extend-select = ["W", "I", "SLF", "SIM"]
|
|
73
|
-
# extend-select = [ "B", "Q", "
|
|
75
|
+
extend-select = ["W", "I", "SLF", "SIM", "C90", "S"]
|
|
76
|
+
# extend-select = [ "B", "Q", "I", "UP", "YTT", "ASYNC", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
|
|
74
77
|
# "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "TID", "TCH", "INT", "PTH",
|
|
75
78
|
# "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
|
|
76
79
|
unfixable = ["ERA"]
|
|
77
80
|
|
|
78
81
|
[tool.ruff.lint.extend-per-file-ignores]
|
|
79
|
-
"tests/*" = ["S101"]
|
|
82
|
+
"tests/*" = ["S101"] # `assert` is fine in tests
|
|
83
|
+
"smoketest/*" = ["S101"] # `assert` is fine in tests
|
|
80
84
|
"tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
|
|
81
85
|
|
|
82
86
|
# things skipped:
|
|
@@ -7,11 +7,13 @@ import warnings
|
|
|
7
7
|
from datetime import datetime, time, timedelta
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Any, Optional, Type, Union
|
|
10
|
-
from xml.etree import
|
|
11
|
-
from xml.etree.ElementTree import Element, ParseError, fromstring
|
|
10
|
+
from xml.etree.ElementTree import Element
|
|
12
11
|
|
|
13
12
|
import environ
|
|
14
13
|
import requests
|
|
14
|
+
from dateutil.parser import isoparse
|
|
15
|
+
from defusedxml import ElementTree
|
|
16
|
+
from defusedxml.ElementTree import ParseError, fromstring
|
|
15
17
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
16
18
|
from lxml import etree
|
|
17
19
|
from requests.auth import HTTPBasicAuth
|
|
@@ -19,19 +21,20 @@ from requests.structures import CaseInsensitiveDict
|
|
|
19
21
|
from requests_toolbelt.multipart import decoder
|
|
20
22
|
|
|
21
23
|
from caselawclient import xquery_type_dicts as query_dicts
|
|
22
|
-
from caselawclient.client_helpers import VersionAnnotation
|
|
23
24
|
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
24
25
|
from caselawclient.models.documents import (
|
|
25
26
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
26
27
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
27
28
|
Document,
|
|
28
29
|
)
|
|
30
|
+
from caselawclient.models.documents.versions import VersionAnnotation
|
|
29
31
|
from caselawclient.models.judgments import Judgment
|
|
30
32
|
from caselawclient.models.press_summaries import PressSummary
|
|
31
33
|
from caselawclient.models.utilities import move
|
|
32
34
|
from caselawclient.search_parameters import SearchParameters
|
|
33
|
-
from caselawclient.types import DocumentURIString
|
|
35
|
+
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
|
|
34
36
|
from caselawclient.xquery_type_dicts import (
|
|
37
|
+
CheckContentHashUniqueByUriDict,
|
|
35
38
|
MarkLogicDocumentURIString,
|
|
36
39
|
MarkLogicDocumentVersionURIString,
|
|
37
40
|
MarkLogicPrivilegeURIString,
|
|
@@ -55,6 +58,11 @@ from .errors import (
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
env = environ.Env()
|
|
61
|
+
|
|
62
|
+
# Requests timeouts: https://requests.readthedocs.io/en/latest/user/advanced/
|
|
63
|
+
CONNECT_TIMEOUT = float(os.environ.get("CONNECT_TIMEOUT", "3.05"))
|
|
64
|
+
READ_TIMEOUT = float(os.environ.get("READ_TIMEOUT", "10.0"))
|
|
65
|
+
|
|
58
66
|
ROOT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
59
67
|
DEFAULT_XSL_TRANSFORM = "accessible-html.xsl"
|
|
60
68
|
|
|
@@ -77,12 +85,6 @@ class MultipartResponseLongerThanExpected(Exception):
|
|
|
77
85
|
"""
|
|
78
86
|
|
|
79
87
|
|
|
80
|
-
class DocumentHasNoTypeCollection(Exception):
|
|
81
|
-
"""
|
|
82
|
-
A MarkLogic document is not part of a collection which identifies its document type.
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
|
|
86
88
|
def get_multipart_strings_from_marklogic_response(
|
|
87
89
|
response: requests.Response,
|
|
88
90
|
) -> list[str]:
|
|
@@ -175,6 +177,7 @@ class MarklogicApiClient:
|
|
|
175
177
|
error_code_classes: dict[str, Type[MarklogicAPIError]] = {
|
|
176
178
|
"XDMP-DOCNOTFOUND": MarklogicResourceNotFoundError,
|
|
177
179
|
"XDMP-LOCKCONFLICT": MarklogicResourceLockedError,
|
|
180
|
+
"XDMP-LOCKED": MarklogicResourceLockedError,
|
|
178
181
|
"DLS-UNMANAGED": MarklogicResourceUnmanagedError,
|
|
179
182
|
"DLS-NOTCHECKEDOUT": MarklogicResourceNotCheckedOutError,
|
|
180
183
|
"DLS-CHECKOUTCONFLICT": MarklogicCheckoutConflictError,
|
|
@@ -239,9 +242,7 @@ class MarklogicApiClient:
|
|
|
239
242
|
return Judgment
|
|
240
243
|
if DOCUMENT_COLLECTION_URI_PRESS_SUMMARY in collections:
|
|
241
244
|
return PressSummary
|
|
242
|
-
|
|
243
|
-
f"The document at URI {uri} is not part of a valid document type collection.",
|
|
244
|
-
)
|
|
245
|
+
return Document
|
|
245
246
|
|
|
246
247
|
def _get_error_code_class(self, error_code: str) -> Type[MarklogicAPIError]:
|
|
247
248
|
"""
|
|
@@ -266,10 +267,12 @@ class MarklogicApiClient:
|
|
|
266
267
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
267
268
|
try:
|
|
268
269
|
xml = fromstring(content_as_xml)
|
|
269
|
-
return
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
270
|
+
return str(
|
|
271
|
+
xml.find(
|
|
272
|
+
"message-code",
|
|
273
|
+
namespaces={"": "http://marklogic.com/xdmp/error"},
|
|
274
|
+
).text
|
|
275
|
+
)
|
|
273
276
|
except (ParseError, TypeError, AttributeError):
|
|
274
277
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
275
278
|
|
|
@@ -322,11 +325,13 @@ class MarklogicApiClient:
|
|
|
322
325
|
self,
|
|
323
326
|
vars: query_dicts.MarkLogicAPIDict,
|
|
324
327
|
xquery_file_name: str,
|
|
328
|
+
timeout: tuple[float, float] = (CONNECT_TIMEOUT, READ_TIMEOUT),
|
|
325
329
|
) -> requests.Response:
|
|
326
330
|
return self.eval(
|
|
327
331
|
self._xquery_path(xquery_file_name),
|
|
328
332
|
vars=json.dumps(vars),
|
|
329
333
|
accept_header="application/xml",
|
|
334
|
+
timeout=timeout,
|
|
330
335
|
)
|
|
331
336
|
|
|
332
337
|
def _eval_and_decode(
|
|
@@ -572,6 +577,7 @@ class MarklogicApiClient:
|
|
|
572
577
|
self,
|
|
573
578
|
document_uri: DocumentURIString,
|
|
574
579
|
document_xml: Element,
|
|
580
|
+
document_type: type[Document],
|
|
575
581
|
annotation: VersionAnnotation,
|
|
576
582
|
) -> requests.Response:
|
|
577
583
|
"""
|
|
@@ -579,6 +585,7 @@ class MarklogicApiClient:
|
|
|
579
585
|
|
|
580
586
|
:param document_uri: The URI to insert the document at
|
|
581
587
|
:param document_xml: The XML of the document to insert
|
|
588
|
+
:param document_type: The type class of the document
|
|
582
589
|
:param annotation: Annotations to record alongside this version
|
|
583
590
|
|
|
584
591
|
:return: The response object from MarkLogic
|
|
@@ -592,6 +599,7 @@ class MarklogicApiClient:
|
|
|
592
599
|
|
|
593
600
|
vars: query_dicts.InsertDocumentDict = {
|
|
594
601
|
"uri": uri,
|
|
602
|
+
"type_collection": document_type.type_collection_name,
|
|
595
603
|
"document": xml.decode("utf-8"),
|
|
596
604
|
"annotation": annotation.as_json,
|
|
597
605
|
}
|
|
@@ -687,10 +695,12 @@ class MarklogicApiClient:
|
|
|
687
695
|
if content == "":
|
|
688
696
|
return None
|
|
689
697
|
response_xml = ElementTree.fromstring(content)
|
|
690
|
-
return
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
698
|
+
return str(
|
|
699
|
+
response_xml.find(
|
|
700
|
+
"dls:annotation",
|
|
701
|
+
namespaces={"dls": "http://marklogic.com/xdmp/dls"},
|
|
702
|
+
).text
|
|
703
|
+
)
|
|
694
704
|
|
|
695
705
|
def get_judgment_version(
|
|
696
706
|
self,
|
|
@@ -719,11 +729,20 @@ class MarklogicApiClient:
|
|
|
719
729
|
== 0
|
|
720
730
|
)
|
|
721
731
|
|
|
732
|
+
def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
|
|
733
|
+
"""
|
|
734
|
+
Returns True if the content hash for this document is unique (not shared with other documents).
|
|
735
|
+
"""
|
|
736
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
737
|
+
vars: CheckContentHashUniqueByUriDict = {"uri": uri}
|
|
738
|
+
return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
|
|
739
|
+
|
|
722
740
|
def eval(
|
|
723
741
|
self,
|
|
724
742
|
xquery_path: str,
|
|
725
743
|
vars: str,
|
|
726
744
|
accept_header: str = "multipart/mixed",
|
|
745
|
+
timeout: tuple[float, float] = (CONNECT_TIMEOUT, READ_TIMEOUT),
|
|
727
746
|
) -> requests.Response:
|
|
728
747
|
headers = {
|
|
729
748
|
"Content-type": "application/x-www-form-urlencoded",
|
|
@@ -743,6 +762,7 @@ class MarklogicApiClient:
|
|
|
743
762
|
url=self._path_to_request_url(path),
|
|
744
763
|
headers=headers,
|
|
745
764
|
data=data,
|
|
765
|
+
timeout=timeout,
|
|
746
766
|
)
|
|
747
767
|
# Raise relevant exception for an erroneous response
|
|
748
768
|
self._raise_for_status(response)
|
|
@@ -782,6 +802,8 @@ class MarklogicApiClient:
|
|
|
782
802
|
:param judge:
|
|
783
803
|
:param party:
|
|
784
804
|
:param neutral_citation:
|
|
805
|
+
:param document_name:
|
|
806
|
+
:param consignment_number:
|
|
785
807
|
:param specific_keyword:
|
|
786
808
|
:param order:
|
|
787
809
|
:param date_from:
|
|
@@ -938,12 +960,50 @@ class MarklogicApiClient:
|
|
|
938
960
|
"value": string_value,
|
|
939
961
|
"name": name,
|
|
940
962
|
}
|
|
963
|
+
"""
|
|
964
|
+
Set a property within MarkLogic which is specifically a boolean.
|
|
965
|
+
|
|
966
|
+
Since XML has no concept of boolean, the actual value in the database is set to `"true"` or `"false"`.
|
|
967
|
+
"""
|
|
941
968
|
return self._send_to_eval(vars, "set_boolean_property.xqy")
|
|
942
969
|
|
|
943
970
|
def get_boolean_property(self, judgment_uri: DocumentURIString, name: str) -> bool:
|
|
971
|
+
"""
|
|
972
|
+
Get a property from MarkLogic which is specifically a boolean.
|
|
973
|
+
|
|
974
|
+
:return: `True` if the property exists and has a value of `"true"`, otherwise `False`
|
|
975
|
+
"""
|
|
944
976
|
content = self.get_property(judgment_uri, name)
|
|
945
977
|
return content == "true"
|
|
946
978
|
|
|
979
|
+
def set_datetime_property(
|
|
980
|
+
self,
|
|
981
|
+
judgment_uri: DocumentURIString,
|
|
982
|
+
name: str,
|
|
983
|
+
value: datetime,
|
|
984
|
+
) -> requests.Response:
|
|
985
|
+
"""Set a property within MarkLogic which is specifically a datetime."""
|
|
986
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
987
|
+
vars: query_dicts.SetDatetimePropertyDict = {
|
|
988
|
+
"uri": uri,
|
|
989
|
+
"value": value.isoformat(),
|
|
990
|
+
"name": name,
|
|
991
|
+
}
|
|
992
|
+
return self._send_to_eval(vars, "set_datetime_property.xqy")
|
|
993
|
+
|
|
994
|
+
def get_datetime_property(self, judgment_uri: DocumentURIString, name: str) -> Optional[datetime]:
|
|
995
|
+
"""
|
|
996
|
+
Get a property from MarkLogic which is specifically a datetime.
|
|
997
|
+
|
|
998
|
+
:return: A datetime with the value of the property, or `None` if it does not exist
|
|
999
|
+
"""
|
|
1000
|
+
content = self.get_property(judgment_uri, name)
|
|
1001
|
+
|
|
1002
|
+
if content:
|
|
1003
|
+
return isoparse(content)
|
|
1004
|
+
|
|
1005
|
+
return None
|
|
1006
|
+
|
|
947
1007
|
def set_published(
|
|
948
1008
|
self,
|
|
949
1009
|
judgment_uri: DocumentURIString,
|
|
@@ -1052,10 +1112,6 @@ class MarklogicApiClient:
|
|
|
1052
1112
|
if show_unpublished and not self.user_can_view_unpublished_judgments(
|
|
1053
1113
|
self.username,
|
|
1054
1114
|
):
|
|
1055
|
-
# The user cannot view unpublished judgments but is requesting to see them
|
|
1056
|
-
logging.warning(
|
|
1057
|
-
f"User {self.username} is attempting to view unpublished judgments but does not have that privilege.",
|
|
1058
|
-
)
|
|
1059
1115
|
return False
|
|
1060
1116
|
return show_unpublished
|
|
1061
1117
|
|
|
@@ -1068,14 +1124,14 @@ class MarklogicApiClient:
|
|
|
1068
1124
|
response = self._send_to_eval(vars, "get_properties_for_search_results.xqy")
|
|
1069
1125
|
return get_single_string_from_marklogic_response(response)
|
|
1070
1126
|
|
|
1071
|
-
def search_and_decode_response(self, search_parameters: SearchParameters) ->
|
|
1127
|
+
def search_and_decode_response(self, search_parameters: SearchParameters) -> bytes:
|
|
1072
1128
|
response = self.advanced_search(search_parameters)
|
|
1073
|
-
return
|
|
1129
|
+
return get_single_bytestring_from_marklogic_response(response)
|
|
1074
1130
|
|
|
1075
1131
|
def search_judgments_and_decode_response(
|
|
1076
1132
|
self,
|
|
1077
1133
|
search_parameters: SearchParameters,
|
|
1078
|
-
) ->
|
|
1134
|
+
) -> bytes:
|
|
1079
1135
|
search_parameters.collections = [DOCUMENT_COLLECTION_URI_JUDGMENT]
|
|
1080
1136
|
return self.search_and_decode_response(search_parameters)
|
|
1081
1137
|
|
|
@@ -1203,11 +1259,31 @@ class MarklogicApiClient:
|
|
|
1203
1259
|
|
|
1204
1260
|
return results
|
|
1205
1261
|
|
|
1206
|
-
def
|
|
1262
|
+
def get_missing_fclid(
|
|
1263
|
+
self,
|
|
1264
|
+
maximum_records: int = 50,
|
|
1265
|
+
) -> list[str]:
|
|
1266
|
+
"""Retrieve the URIs of published documents which do not have an identifier in the `fclid` schema."""
|
|
1267
|
+
vars: query_dicts.GetMissingFclidDict = {
|
|
1268
|
+
"maximum_records": maximum_records,
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1272
|
+
self._send_to_eval(
|
|
1273
|
+
vars,
|
|
1274
|
+
"get_missing_fclid.xqy",
|
|
1275
|
+
)
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
return results
|
|
1279
|
+
|
|
1280
|
+
def resolve_from_identifier_slug(
|
|
1281
|
+
self, identifier_slug: DocumentIdentifierSlug, published_only: bool = True
|
|
1282
|
+
) -> IdentifierResolutions:
|
|
1207
1283
|
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1208
1284
|
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1209
1285
|
vars: query_dicts.ResolveFromIdentifierSlugDict = {
|
|
1210
|
-
"
|
|
1286
|
+
"identifier_slug": identifier_slug,
|
|
1211
1287
|
"published_only": int(published_only),
|
|
1212
1288
|
}
|
|
1213
1289
|
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
@@ -1218,14 +1294,8 @@ class MarklogicApiClient:
|
|
|
1218
1294
|
)
|
|
1219
1295
|
return IdentifierResolutions.from_marklogic_output(raw_results)
|
|
1220
1296
|
|
|
1221
|
-
def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
|
|
1222
|
-
warnings.warn(
|
|
1223
|
-
"resolve_from_identifier deprecated, use resolve_from_identifier_slug instead", DeprecationWarning
|
|
1224
|
-
)
|
|
1225
|
-
return self.resolve_from_identifier(identifier_uri, published_only)
|
|
1226
|
-
|
|
1227
1297
|
def resolve_from_identifier_value(
|
|
1228
|
-
self, identifier_value:
|
|
1298
|
+
self, identifier_value: DocumentIdentifierValue, published_only: bool = True
|
|
1229
1299
|
) -> IdentifierResolutions:
|
|
1230
1300
|
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1231
1301
|
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
if os.getenv("PDOC_DYNAMIC_VERSION") == "1":
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import tomllib
|
|
7
|
+
|
|
8
|
+
pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
|
9
|
+
with pyproject_path.open("rb") as f:
|
|
10
|
+
__version__ = tomllib.load(f)["tool"]["poetry"]["version"]
|
|
11
|
+
__pip_version_string__ = f"~={__version__}"
|
|
12
|
+
__poetry_version_string__ = f' = "^{__version__}"'
|
|
13
|
+
|
|
14
|
+
else:
|
|
15
|
+
__pip_version_string__ = ""
|
|
16
|
+
__poetry_version_string__ = ""
|
|
17
|
+
|
|
18
|
+
__doc__ = f"""
|
|
19
|
+
|
|
20
|
+
# Installation
|
|
21
|
+
|
|
22
|
+
Include the API client in your project using Pip or Poetry:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install ds-caselaw-marklogic-api-client
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
poetry add ds-caselaw-marklogic-api-client
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
or in your projects `requirements.txt` with:
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
ds-caselaw-marklogic-api-client{__pip_version_string__}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
or `pyproject.toml` for Poetry with:
|
|
39
|
+
|
|
40
|
+
```text
|
|
41
|
+
ds-caselaw-marklogic-api-client{__poetry_version_string__}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
# Usage
|
|
45
|
+
|
|
46
|
+
## Initialising the client
|
|
47
|
+
|
|
48
|
+
Import the `MarklogicApiClient` class and instantiate with appropriate credentials:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
|
|
52
|
+
from caselawclient.Client import MarklogicApiClient
|
|
53
|
+
|
|
54
|
+
client = MarklogicApiClient(
|
|
55
|
+
host="hostname",
|
|
56
|
+
username="username",
|
|
57
|
+
password="password",
|
|
58
|
+
use_https=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
"""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
3
|
+
from caselawclient.xml_helpers import DEFAULT_NAMESPACES
|
|
4
|
+
|
|
5
|
+
from ..models.documents import Document
|
|
6
|
+
from ..models.judgments import Judgment
|
|
7
|
+
from ..models.parser_logs import ParserLog
|
|
8
|
+
from ..models.press_summaries import PressSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CannotDetermineDocumentType(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_document_type_class(xml: bytes) -> type[Document]:
|
|
16
|
+
"""Attempt to get the type of the document based on the top-level structure of the XML document."""
|
|
17
|
+
|
|
18
|
+
node = etree.fromstring(xml)
|
|
19
|
+
|
|
20
|
+
# If the main node is `<judgment>`, it's a judgment
|
|
21
|
+
if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES):
|
|
22
|
+
return Judgment
|
|
23
|
+
|
|
24
|
+
# If the main node is `<doc name='pressSummary'>`, it's a press summary
|
|
25
|
+
if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES):
|
|
26
|
+
return PressSummary
|
|
27
|
+
|
|
28
|
+
# If the document is a parser error with a root element of `error`, it's not of a special type.
|
|
29
|
+
if node.xpath("/error", namespaces=DEFAULT_NAMESPACES):
|
|
30
|
+
return ParserLog
|
|
31
|
+
|
|
32
|
+
# Otherwise, we don't know for sure. Fail out.
|
|
33
|
+
raise CannotDetermineDocumentType(
|
|
34
|
+
"Unable to determine the Document type by its XML",
|
|
35
|
+
)
|