ds-caselaw-marklogic-api-client 27.1.0__tar.gz → 43.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/PKG-INFO +16 -12
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/README.md +3 -1
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/pyproject.toml +24 -17
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/Client.py +219 -32
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/__init__.py +63 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/client_helpers/__init__.py +35 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/errors.py +1 -3
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/factories.py +200 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/identifier_resolution.py +52 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/managers/merge/__init__.py +51 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/managers/merge/checks.py +79 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/documents/__init__.py +228 -75
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/documents/body.py +96 -26
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/documents/comparison.py +42 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/documents/exceptions.py +4 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/documents/transforms/html.xsl +10 -2
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/documents/xml.py +72 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/__init__.py +216 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/collection.py +172 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/exceptions.py +6 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/fclid.py +55 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/neutral_citation.py +77 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/press_summary_ncn.py +24 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/identifiers/unpacker.py +61 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/judgments.py +60 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/neutral_citation_mixin.py +10 -9
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/parser_logs.py +14 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/press_summaries.py +59 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/utilities/__init__.py +6 -7
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/utilities/aws.py +69 -30
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/utilities/move.py +1 -1
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/py.typed +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/responses/search_result.py +42 -7
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/search_parameters.py +6 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/types.py +108 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xml_helpers.py +41 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/check_content_hash_unique_by_uri.xqy +15 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_components_for_document.xqy +1 -1
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_judgment.xqy +69 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_missing_fclid.xqy +25 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_next_document_sequence_number.xqy +14 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +20 -9
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +7 -4
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_property_as_node.xqy +9 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_recently_enriched.xqy +18 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/get_recently_parsed.xqy +19 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/insert_document.xqy +2 -6
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +16 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/resolve_from_identifier_value.xqy +17 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/set_datetime_property.xqy +11 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xquery/set_property_as_node.xqy +11 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/update_locked_judgment.xqy +1 -2
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/xslt_transform.xqy +1 -1
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery_type_dicts.py +48 -2
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xslt/modify_xml_live.xsl +70 -0
- ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/xslt/sample.xsl +26 -0
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/__init__.py +0 -40
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/factories.py +0 -133
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/models/documents/xml.py +0 -43
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/models/judgments.py +0 -53
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/models/press_summaries.py +0 -54
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/xml_helpers.py +0 -22
- ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/xquery/get_judgment.xqy +0 -21
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/LICENSE.md +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/content_hash.py +0 -0
- /ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/py.typed → /ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/managers/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/documents/statuses.py +0 -0
- /ds_caselaw_marklogic_api_client-27.1.0/src/caselawclient/client_helpers/__init__.py → /ds_caselaw_marklogic_api_client-43.1.0/src/caselawclient/models/documents/versions.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/models/utilities/dates.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/responses/__init__.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/responses/search_response.py +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/set_property.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/update_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
- {ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/src/caselawclient/xquery/xslt.xqy +0 -0
|
@@ -1,31 +1,33 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: ds-caselaw-marklogic-api-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 43.1.0
|
|
4
4
|
Summary: An API client for interacting with the underlying data in Find Caselaw.
|
|
5
|
-
Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
6
5
|
Keywords: national archives,caselaw
|
|
7
6
|
Author: The National Archives
|
|
8
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.12.0,<4.0.0
|
|
9
8
|
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
11
|
Requires-Dist: boto3 (>=1.26.112,<2.0.0)
|
|
14
|
-
Requires-Dist: certifi (>=
|
|
12
|
+
Requires-Dist: certifi (>=2025.10.5,<2025.11.0)
|
|
15
13
|
Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
|
|
16
|
-
Requires-Dist:
|
|
14
|
+
Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
|
|
15
|
+
Requires-Dist: django-environ (>=0.12.0)
|
|
17
16
|
Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
|
|
18
17
|
Requires-Dist: idna (>=3.4,<4.0)
|
|
19
|
-
Requires-Dist: lxml (>=
|
|
18
|
+
Requires-Dist: lxml (>=6.0.0,<7.0.0)
|
|
20
19
|
Requires-Dist: memoization (>=0.4.0,<0.5.0)
|
|
21
20
|
Requires-Dist: mypy-boto3-s3 (>=1.26.104,<2.0.0)
|
|
22
21
|
Requires-Dist: mypy-boto3-sns (>=1.26.69,<2.0.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.12.3,<3.0.0)
|
|
23
23
|
Requires-Dist: python-dateutil (>=2.9.0-post.0,<3.0.0)
|
|
24
|
-
Requires-Dist: pytz (
|
|
24
|
+
Requires-Dist: pytz (>2024)
|
|
25
25
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
|
26
26
|
Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
|
|
27
27
|
Requires-Dist: saxonche (>=12.5.0,<13.0.0)
|
|
28
|
+
Requires-Dist: sqids (>=0.5.0,<0.6.0)
|
|
28
29
|
Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
|
|
30
|
+
Project-URL: Homepage, https://github.com/nationalarchives/ds-caselaw-custom-api-client
|
|
29
31
|
Description-Content-Type: text/markdown
|
|
30
32
|
|
|
31
33
|
# The National Archives: Find Case Law
|
|
@@ -34,7 +36,9 @@ This repository is part of the [Find Case Law](https://caselaw.nationalarchives.
|
|
|
34
36
|
|
|
35
37
|
# MarkLogic API Client
|
|
36
38
|
|
|
37
|
-
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
39
|
+
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
40
|
+

|
|
41
|
+

|
|
38
42
|
|
|
39
43
|
This is an API Client for connecting to Marklogic for The National Archive's Caselaw site.
|
|
40
44
|
|
|
@@ -4,7 +4,9 @@ This repository is part of the [Find Case Law](https://caselaw.nationalarchives.
|
|
|
4
4
|
|
|
5
5
|
# MarkLogic API Client
|
|
6
6
|
|
|
7
|
-
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
7
|
+
[](https://pypi.org/project/ds-caselaw-marklogic-api-client/)
|
|
8
|
+

|
|
9
|
+

|
|
8
10
|
|
|
9
11
|
This is an API Client for connecting to Marklogic for The National Archive's Caselaw site.
|
|
10
12
|
|
{ds_caselaw_marklogic_api_client-27.1.0 → ds_caselaw_marklogic_api_client-43.1.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ds-caselaw-marklogic-api-client"
|
|
3
|
-
version = "
|
|
3
|
+
version = "43.1.0"
|
|
4
4
|
description = "An API client for interacting with the underlying data in Find Caselaw."
|
|
5
5
|
authors = ["The National Archives"]
|
|
6
6
|
homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
|
|
@@ -11,38 +11,42 @@ packages = [
|
|
|
11
11
|
]
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
|
-
python = "^3.
|
|
15
|
-
certifi = ">=
|
|
14
|
+
python = "^3.12.0"
|
|
15
|
+
certifi = ">=2025.10.5,<2025.11.0"
|
|
16
16
|
charset-normalizer = "^3.0.0"
|
|
17
|
-
django-environ = "
|
|
17
|
+
django-environ = ">=0.12.0"
|
|
18
18
|
idna = "^3.4"
|
|
19
19
|
requests = "^2.28.2"
|
|
20
20
|
requests-toolbelt = ">=0.10.1,<1.1.0"
|
|
21
21
|
memoization = "^0.4.0"
|
|
22
|
-
lxml = "^
|
|
22
|
+
lxml = "^6.0.0"
|
|
23
23
|
ds-caselaw-utils = "^2.0.0"
|
|
24
24
|
boto3 = "^1.26.112"
|
|
25
25
|
typing-extensions = "^4.7.1"
|
|
26
26
|
mypy-boto3-s3 = "^1.26.104"
|
|
27
27
|
mypy-boto3-sns = "^1.26.69"
|
|
28
|
-
pytz = "
|
|
28
|
+
pytz = ">2024"
|
|
29
29
|
python-dateutil = "^2.9.0-post.0"
|
|
30
30
|
saxonche = "^12.5.0"
|
|
31
|
+
sqids = "^0.5.0"
|
|
32
|
+
defusedxml = "^0.7.1"
|
|
33
|
+
pydantic = "^2.12.3"
|
|
31
34
|
|
|
32
35
|
[tool.poetry.group.dev.dependencies]
|
|
33
|
-
coverage = "
|
|
34
|
-
pytest = "
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
36
|
+
coverage = "7.11.0"
|
|
37
|
+
pytest = "8.4.2"
|
|
38
|
+
pytest-cov = "7.0.0"
|
|
39
|
+
beautifulsoup4 = "4.14.2"
|
|
40
|
+
responses = "0.25.8"
|
|
41
|
+
python-dotenv = "1.2.1"
|
|
42
|
+
time-machine = "2.19.0"
|
|
43
|
+
moto = {version = "5.1.16", extras = ["all"]}
|
|
40
44
|
|
|
41
45
|
[tool.poetry.group.docs]
|
|
42
46
|
optional = true
|
|
43
47
|
|
|
44
48
|
[tool.poetry.group.docs.dependencies]
|
|
45
|
-
pdoc = "^
|
|
49
|
+
pdoc = "^16.0.0"
|
|
46
50
|
|
|
47
51
|
|
|
48
52
|
[tool.commitizen]
|
|
@@ -51,6 +55,8 @@ tag_format = "v$version"
|
|
|
51
55
|
version_scheme = "semver2"
|
|
52
56
|
version_provider = "poetry"
|
|
53
57
|
update_changelog_on_bump = true
|
|
58
|
+
changelog_incremental = true
|
|
59
|
+
|
|
54
60
|
[build-system]
|
|
55
61
|
requires = ["poetry-core"]
|
|
56
62
|
build-backend = "poetry.core.masonry.api"
|
|
@@ -66,14 +72,15 @@ line-length = 120
|
|
|
66
72
|
|
|
67
73
|
[tool.ruff.lint]
|
|
68
74
|
ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
|
|
69
|
-
extend-select = ["W", "I", "SLF", "SIM"]
|
|
70
|
-
# extend-select = [ "B", "Q", "
|
|
75
|
+
extend-select = ["W", "I", "SLF", "SIM", "C90", "S"]
|
|
76
|
+
# extend-select = [ "B", "Q", "I", "UP", "YTT", "ASYNC", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
|
|
71
77
|
# "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "TID", "TCH", "INT", "PTH",
|
|
72
78
|
# "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
|
|
73
79
|
unfixable = ["ERA"]
|
|
74
80
|
|
|
75
81
|
[tool.ruff.lint.extend-per-file-ignores]
|
|
76
|
-
"tests/*" = ["S101"]
|
|
82
|
+
"tests/*" = ["S101"] # `assert` is fine in tests
|
|
83
|
+
"smoketest/*" = ["S101"] # `assert` is fine in tests
|
|
77
84
|
"tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
|
|
78
85
|
|
|
79
86
|
# things skipped:
|
|
@@ -7,29 +7,34 @@ import warnings
|
|
|
7
7
|
from datetime import datetime, time, timedelta
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Any, Optional, Type, Union
|
|
10
|
-
from xml.etree import
|
|
11
|
-
from xml.etree.ElementTree import Element, ParseError, fromstring
|
|
10
|
+
from xml.etree.ElementTree import Element
|
|
12
11
|
|
|
13
12
|
import environ
|
|
14
13
|
import requests
|
|
14
|
+
from dateutil.parser import isoparse
|
|
15
|
+
from defusedxml import ElementTree
|
|
16
|
+
from defusedxml.ElementTree import ParseError, fromstring
|
|
15
17
|
from ds_caselaw_utils.types import NeutralCitationString
|
|
18
|
+
from lxml import etree
|
|
16
19
|
from requests.auth import HTTPBasicAuth
|
|
17
20
|
from requests.structures import CaseInsensitiveDict
|
|
18
21
|
from requests_toolbelt.multipart import decoder
|
|
19
22
|
|
|
20
23
|
from caselawclient import xquery_type_dicts as query_dicts
|
|
21
|
-
from caselawclient.
|
|
24
|
+
from caselawclient.identifier_resolution import IdentifierResolutions
|
|
22
25
|
from caselawclient.models.documents import (
|
|
23
26
|
DOCUMENT_COLLECTION_URI_JUDGMENT,
|
|
24
27
|
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
|
|
25
28
|
Document,
|
|
26
|
-
DocumentURIString,
|
|
27
29
|
)
|
|
30
|
+
from caselawclient.models.documents.versions import VersionAnnotation
|
|
28
31
|
from caselawclient.models.judgments import Judgment
|
|
29
32
|
from caselawclient.models.press_summaries import PressSummary
|
|
30
33
|
from caselawclient.models.utilities import move
|
|
31
34
|
from caselawclient.search_parameters import SearchParameters
|
|
35
|
+
from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, DocumentURIString
|
|
32
36
|
from caselawclient.xquery_type_dicts import (
|
|
37
|
+
CheckContentHashUniqueByUriDict,
|
|
33
38
|
MarkLogicDocumentURIString,
|
|
34
39
|
MarkLogicDocumentVersionURIString,
|
|
35
40
|
MarkLogicPrivilegeURIString,
|
|
@@ -53,6 +58,11 @@ from .errors import (
|
|
|
53
58
|
)
|
|
54
59
|
|
|
55
60
|
env = environ.Env()
|
|
61
|
+
|
|
62
|
+
# Requests timeouts: https://requests.readthedocs.io/en/latest/user/advanced/
|
|
63
|
+
CONNECT_TIMEOUT = float(os.environ.get("CONNECT_TIMEOUT", "3.05"))
|
|
64
|
+
READ_TIMEOUT = float(os.environ.get("READ_TIMEOUT", "10.0"))
|
|
65
|
+
|
|
56
66
|
ROOT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
57
67
|
DEFAULT_XSL_TRANSFORM = "accessible-html.xsl"
|
|
58
68
|
|
|
@@ -62,6 +72,8 @@ except importlib.metadata.PackageNotFoundError:
|
|
|
62
72
|
VERSION = "0"
|
|
63
73
|
DEFAULT_USER_AGENT = f"ds-caselaw-marklogic-api-client/{VERSION}"
|
|
64
74
|
|
|
75
|
+
DEBUG: bool = bool(os.getenv("DEBUG", default=False))
|
|
76
|
+
|
|
65
77
|
|
|
66
78
|
class NoResponse(Exception):
|
|
67
79
|
"""A requests HTTPError has no response. We expect this will never happen."""
|
|
@@ -73,12 +85,6 @@ class MultipartResponseLongerThanExpected(Exception):
|
|
|
73
85
|
"""
|
|
74
86
|
|
|
75
87
|
|
|
76
|
-
class DocumentHasNoTypeCollection(Exception):
|
|
77
|
-
"""
|
|
78
|
-
A MarkLogic document is not part of a collection which identifies its document type.
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
|
|
82
88
|
def get_multipart_strings_from_marklogic_response(
|
|
83
89
|
response: requests.Response,
|
|
84
90
|
) -> list[str]:
|
|
@@ -171,6 +177,7 @@ class MarklogicApiClient:
|
|
|
171
177
|
error_code_classes: dict[str, Type[MarklogicAPIError]] = {
|
|
172
178
|
"XDMP-DOCNOTFOUND": MarklogicResourceNotFoundError,
|
|
173
179
|
"XDMP-LOCKCONFLICT": MarklogicResourceLockedError,
|
|
180
|
+
"XDMP-LOCKED": MarklogicResourceLockedError,
|
|
174
181
|
"DLS-UNMANAGED": MarklogicResourceUnmanagedError,
|
|
175
182
|
"DLS-NOTCHECKEDOUT": MarklogicResourceNotCheckedOutError,
|
|
176
183
|
"DLS-CHECKOUTCONFLICT": MarklogicCheckoutConflictError,
|
|
@@ -207,20 +214,22 @@ class MarklogicApiClient:
|
|
|
207
214
|
Returns a list of PressSummary objects associated with a given Document URI
|
|
208
215
|
"""
|
|
209
216
|
vars: query_dicts.GetComponentsForDocumentDict = {
|
|
210
|
-
"parent_uri":
|
|
217
|
+
"parent_uri": uri,
|
|
211
218
|
"component": "pressSummary",
|
|
212
219
|
}
|
|
213
220
|
response = self._send_to_eval(vars, "get_components_for_document.xqy")
|
|
214
221
|
uris = get_multipart_strings_from_marklogic_response(response)
|
|
215
|
-
return [
|
|
222
|
+
return [
|
|
223
|
+
PressSummary(DocumentURIString(uri.strip("/").strip(".xml")), self) for uri in uris
|
|
224
|
+
] # TODO: Migrate this strip behaviour into proper manipulation of a MarkLogicURIString
|
|
216
225
|
|
|
217
226
|
def get_document_by_uri(
|
|
218
227
|
self,
|
|
219
228
|
uri: DocumentURIString,
|
|
220
|
-
|
|
229
|
+
search_query: Optional[str] = None,
|
|
221
230
|
) -> Document:
|
|
222
231
|
document_type_class = self.get_document_type_from_uri(uri)
|
|
223
|
-
return document_type_class(uri, self)
|
|
232
|
+
return document_type_class(uri, self, search_query=search_query)
|
|
224
233
|
|
|
225
234
|
def get_document_type_from_uri(self, uri: DocumentURIString) -> Type[Document]:
|
|
226
235
|
vars: query_dicts.DocumentCollectionsDict = {
|
|
@@ -233,9 +242,7 @@ class MarklogicApiClient:
|
|
|
233
242
|
return Judgment
|
|
234
243
|
if DOCUMENT_COLLECTION_URI_PRESS_SUMMARY in collections:
|
|
235
244
|
return PressSummary
|
|
236
|
-
|
|
237
|
-
f"The document at URI {uri} is not part of a valid document type collection.",
|
|
238
|
-
)
|
|
245
|
+
return Document
|
|
239
246
|
|
|
240
247
|
def _get_error_code_class(self, error_code: str) -> Type[MarklogicAPIError]:
|
|
241
248
|
"""
|
|
@@ -260,10 +267,12 @@ class MarklogicApiClient:
|
|
|
260
267
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
261
268
|
try:
|
|
262
269
|
xml = fromstring(content_as_xml)
|
|
263
|
-
return
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
270
|
+
return str(
|
|
271
|
+
xml.find(
|
|
272
|
+
"message-code",
|
|
273
|
+
namespaces={"": "http://marklogic.com/xdmp/error"},
|
|
274
|
+
).text
|
|
275
|
+
)
|
|
267
276
|
except (ParseError, TypeError, AttributeError):
|
|
268
277
|
return "Unknown error, Marklogic returned a null or empty response"
|
|
269
278
|
|
|
@@ -316,11 +325,13 @@ class MarklogicApiClient:
|
|
|
316
325
|
self,
|
|
317
326
|
vars: query_dicts.MarkLogicAPIDict,
|
|
318
327
|
xquery_file_name: str,
|
|
328
|
+
timeout: tuple[float, float] = (CONNECT_TIMEOUT, READ_TIMEOUT),
|
|
319
329
|
) -> requests.Response:
|
|
320
330
|
return self.eval(
|
|
321
331
|
self._xquery_path(xquery_file_name),
|
|
322
332
|
vars=json.dumps(vars),
|
|
323
333
|
accept_header="application/xml",
|
|
334
|
+
timeout=timeout,
|
|
324
335
|
)
|
|
325
336
|
|
|
326
337
|
def _eval_and_decode(
|
|
@@ -403,6 +414,7 @@ class MarklogicApiClient:
|
|
|
403
414
|
judgment_uri: DocumentURIString,
|
|
404
415
|
version_uri: Optional[DocumentURIString] = None,
|
|
405
416
|
show_unpublished: bool = False,
|
|
417
|
+
search_query: Optional[str] = None,
|
|
406
418
|
) -> bytes:
|
|
407
419
|
marklogic_document_uri = self._format_uri_for_marklogic(judgment_uri)
|
|
408
420
|
marklogic_document_version_uri = (
|
|
@@ -418,6 +430,7 @@ class MarklogicApiClient:
|
|
|
418
430
|
"uri": marklogic_document_uri,
|
|
419
431
|
"version_uri": marklogic_document_version_uri,
|
|
420
432
|
"show_unpublished": show_unpublished,
|
|
433
|
+
"search_query": search_query,
|
|
421
434
|
}
|
|
422
435
|
|
|
423
436
|
response = self._eval_as_bytes(vars, "get_judgment.xqy")
|
|
@@ -433,11 +446,13 @@ class MarklogicApiClient:
|
|
|
433
446
|
judgment_uri: DocumentURIString,
|
|
434
447
|
version_uri: Optional[DocumentURIString] = None,
|
|
435
448
|
show_unpublished: bool = False,
|
|
449
|
+
search_query: Optional[str] = None,
|
|
436
450
|
) -> str:
|
|
437
451
|
return self.get_judgment_xml_bytestring(
|
|
438
452
|
judgment_uri,
|
|
439
453
|
version_uri,
|
|
440
454
|
show_unpublished,
|
|
455
|
+
search_query=search_query,
|
|
441
456
|
).decode(encoding="utf-8")
|
|
442
457
|
|
|
443
458
|
def set_document_name(
|
|
@@ -562,6 +577,7 @@ class MarklogicApiClient:
|
|
|
562
577
|
self,
|
|
563
578
|
document_uri: DocumentURIString,
|
|
564
579
|
document_xml: Element,
|
|
580
|
+
document_type: type[Document],
|
|
565
581
|
annotation: VersionAnnotation,
|
|
566
582
|
) -> requests.Response:
|
|
567
583
|
"""
|
|
@@ -569,6 +585,7 @@ class MarklogicApiClient:
|
|
|
569
585
|
|
|
570
586
|
:param document_uri: The URI to insert the document at
|
|
571
587
|
:param document_xml: The XML of the document to insert
|
|
588
|
+
:param document_type: The type class of the document
|
|
572
589
|
:param annotation: Annotations to record alongside this version
|
|
573
590
|
|
|
574
591
|
:return: The response object from MarkLogic
|
|
@@ -582,6 +599,7 @@ class MarklogicApiClient:
|
|
|
582
599
|
|
|
583
600
|
vars: query_dicts.InsertDocumentDict = {
|
|
584
601
|
"uri": uri,
|
|
602
|
+
"type_collection": document_type.type_collection_name,
|
|
585
603
|
"document": xml.decode("utf-8"),
|
|
586
604
|
"annotation": annotation.as_json,
|
|
587
605
|
}
|
|
@@ -677,10 +695,12 @@ class MarklogicApiClient:
|
|
|
677
695
|
if content == "":
|
|
678
696
|
return None
|
|
679
697
|
response_xml = ElementTree.fromstring(content)
|
|
680
|
-
return
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
698
|
+
return str(
|
|
699
|
+
response_xml.find(
|
|
700
|
+
"dls:annotation",
|
|
701
|
+
namespaces={"dls": "http://marklogic.com/xdmp/dls"},
|
|
702
|
+
).text
|
|
703
|
+
)
|
|
684
704
|
|
|
685
705
|
def get_judgment_version(
|
|
686
706
|
self,
|
|
@@ -709,11 +729,20 @@ class MarklogicApiClient:
|
|
|
709
729
|
== 0
|
|
710
730
|
)
|
|
711
731
|
|
|
732
|
+
def has_unique_content_hash(self, judgment_uri: DocumentURIString) -> bool:
|
|
733
|
+
"""
|
|
734
|
+
Returns True if the content hash for this document is unique (not shared with other documents).
|
|
735
|
+
"""
|
|
736
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
737
|
+
vars: CheckContentHashUniqueByUriDict = {"uri": uri}
|
|
738
|
+
return self._eval_and_decode(vars, "check_content_hash_unique_by_uri.xqy") == "true"
|
|
739
|
+
|
|
712
740
|
def eval(
|
|
713
741
|
self,
|
|
714
742
|
xquery_path: str,
|
|
715
743
|
vars: str,
|
|
716
744
|
accept_header: str = "multipart/mixed",
|
|
745
|
+
timeout: tuple[float, float] = (CONNECT_TIMEOUT, READ_TIMEOUT),
|
|
717
746
|
) -> requests.Response:
|
|
718
747
|
headers = {
|
|
719
748
|
"Content-type": "application/x-www-form-urlencoded",
|
|
@@ -724,11 +753,16 @@ class MarklogicApiClient:
|
|
|
724
753
|
"vars": vars,
|
|
725
754
|
}
|
|
726
755
|
path = "LATEST/eval"
|
|
756
|
+
|
|
757
|
+
if DEBUG:
|
|
758
|
+
print(f"Sending {vars} to {xquery_path}")
|
|
759
|
+
|
|
727
760
|
response = self.session.request(
|
|
728
761
|
"POST",
|
|
729
762
|
url=self._path_to_request_url(path),
|
|
730
763
|
headers=headers,
|
|
731
764
|
data=data,
|
|
765
|
+
timeout=timeout,
|
|
732
766
|
)
|
|
733
767
|
# Raise relevant exception for an erroneous response
|
|
734
768
|
self._raise_for_status(response)
|
|
@@ -768,6 +802,8 @@ class MarklogicApiClient:
|
|
|
768
802
|
:param judge:
|
|
769
803
|
:param party:
|
|
770
804
|
:param neutral_citation:
|
|
805
|
+
:param document_name:
|
|
806
|
+
:param consignment_number:
|
|
771
807
|
:param specific_keyword:
|
|
772
808
|
:param order:
|
|
773
809
|
:param date_from:
|
|
@@ -852,6 +888,17 @@ class MarklogicApiClient:
|
|
|
852
888
|
}
|
|
853
889
|
return self._eval_and_decode(vars, "get_property.xqy")
|
|
854
890
|
|
|
891
|
+
def get_property_as_node(self, judgment_uri: DocumentURIString, name: str) -> Optional[etree._Element]:
|
|
892
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
893
|
+
vars: query_dicts.GetPropertyAsNodeDict = {
|
|
894
|
+
"uri": uri,
|
|
895
|
+
"name": name,
|
|
896
|
+
}
|
|
897
|
+
value = self._eval_and_decode(vars, "get_property_as_node.xqy")
|
|
898
|
+
if not value:
|
|
899
|
+
return None
|
|
900
|
+
return etree.fromstring(value)
|
|
901
|
+
|
|
855
902
|
def get_version_annotation(self, judgment_uri: DocumentURIString) -> str:
|
|
856
903
|
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
857
904
|
vars: query_dicts.GetVersionAnnotationDict = {
|
|
@@ -884,6 +931,22 @@ class MarklogicApiClient:
|
|
|
884
931
|
|
|
885
932
|
return self._send_to_eval(vars, "set_property.xqy")
|
|
886
933
|
|
|
934
|
+
def set_property_as_node(
|
|
935
|
+
self,
|
|
936
|
+
judgment_uri: DocumentURIString,
|
|
937
|
+
name: str,
|
|
938
|
+
value: etree._Element,
|
|
939
|
+
) -> requests.Response:
|
|
940
|
+
"""Given a root node, set the value of the MarkLogic property for a document to the _contents_ of that root node. The root node itself is discarded."""
|
|
941
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
942
|
+
vars: query_dicts.SetPropertyAsNodeDict = {
|
|
943
|
+
"uri": uri,
|
|
944
|
+
"value": etree.tostring(value).decode(),
|
|
945
|
+
"name": name,
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
return self._send_to_eval(vars, "set_property_as_node.xqy")
|
|
949
|
+
|
|
887
950
|
def set_boolean_property(
|
|
888
951
|
self,
|
|
889
952
|
judgment_uri: DocumentURIString,
|
|
@@ -897,12 +960,50 @@ class MarklogicApiClient:
|
|
|
897
960
|
"value": string_value,
|
|
898
961
|
"name": name,
|
|
899
962
|
}
|
|
963
|
+
"""
|
|
964
|
+
Set a property within MarkLogic which is specifically a boolean.
|
|
965
|
+
|
|
966
|
+
Since XML has no concept of boolean, the actual value in the database is set to `"true"` or `"false"`.
|
|
967
|
+
"""
|
|
900
968
|
return self._send_to_eval(vars, "set_boolean_property.xqy")
|
|
901
969
|
|
|
902
970
|
def get_boolean_property(self, judgment_uri: DocumentURIString, name: str) -> bool:
|
|
971
|
+
"""
|
|
972
|
+
Get a property from MarkLogic which is specifically a boolean.
|
|
973
|
+
|
|
974
|
+
:return: `True` if the property exists and has a value of `"true"`, otherwise `False`
|
|
975
|
+
"""
|
|
903
976
|
content = self.get_property(judgment_uri, name)
|
|
904
977
|
return content == "true"
|
|
905
978
|
|
|
979
|
+
def set_datetime_property(
|
|
980
|
+
self,
|
|
981
|
+
judgment_uri: DocumentURIString,
|
|
982
|
+
name: str,
|
|
983
|
+
value: datetime,
|
|
984
|
+
) -> requests.Response:
|
|
985
|
+
"""Set a property within MarkLogic which is specifically a datetime."""
|
|
986
|
+
uri = self._format_uri_for_marklogic(judgment_uri)
|
|
987
|
+
vars: query_dicts.SetDatetimePropertyDict = {
|
|
988
|
+
"uri": uri,
|
|
989
|
+
"value": value.isoformat(),
|
|
990
|
+
"name": name,
|
|
991
|
+
}
|
|
992
|
+
return self._send_to_eval(vars, "set_datetime_property.xqy")
|
|
993
|
+
|
|
994
|
+
def get_datetime_property(self, judgment_uri: DocumentURIString, name: str) -> Optional[datetime]:
|
|
995
|
+
"""
|
|
996
|
+
Get a property from MarkLogic which is specifically a datetime.
|
|
997
|
+
|
|
998
|
+
:return: A datetime with the value of the property, or `None` if it does not exist
|
|
999
|
+
"""
|
|
1000
|
+
content = self.get_property(judgment_uri, name)
|
|
1001
|
+
|
|
1002
|
+
if content:
|
|
1003
|
+
return isoparse(content)
|
|
1004
|
+
|
|
1005
|
+
return None
|
|
1006
|
+
|
|
906
1007
|
def set_published(
|
|
907
1008
|
self,
|
|
908
1009
|
judgment_uri: DocumentURIString,
|
|
@@ -1011,10 +1112,6 @@ class MarklogicApiClient:
|
|
|
1011
1112
|
if show_unpublished and not self.user_can_view_unpublished_judgments(
|
|
1012
1113
|
self.username,
|
|
1013
1114
|
):
|
|
1014
|
-
# The user cannot view unpublished judgments but is requesting to see them
|
|
1015
|
-
logging.warning(
|
|
1016
|
-
f"User {self.username} is attempting to view unpublished judgments but does not have that privilege.",
|
|
1017
|
-
)
|
|
1018
1115
|
return False
|
|
1019
1116
|
return show_unpublished
|
|
1020
1117
|
|
|
@@ -1027,14 +1124,14 @@ class MarklogicApiClient:
|
|
|
1027
1124
|
response = self._send_to_eval(vars, "get_properties_for_search_results.xqy")
|
|
1028
1125
|
return get_single_string_from_marklogic_response(response)
|
|
1029
1126
|
|
|
1030
|
-
def search_and_decode_response(self, search_parameters: SearchParameters) ->
|
|
1127
|
+
def search_and_decode_response(self, search_parameters: SearchParameters) -> bytes:
|
|
1031
1128
|
response = self.advanced_search(search_parameters)
|
|
1032
|
-
return
|
|
1129
|
+
return get_single_bytestring_from_marklogic_response(response)
|
|
1033
1130
|
|
|
1034
1131
|
def search_judgments_and_decode_response(
|
|
1035
1132
|
self,
|
|
1036
1133
|
search_parameters: SearchParameters,
|
|
1037
|
-
) ->
|
|
1134
|
+
) -> bytes:
|
|
1038
1135
|
search_parameters.collections = [DOCUMENT_COLLECTION_URI_JUDGMENT]
|
|
1039
1136
|
return self.search_and_decode_response(search_parameters)
|
|
1040
1137
|
|
|
@@ -1076,6 +1173,7 @@ class MarklogicApiClient:
|
|
|
1076
1173
|
self,
|
|
1077
1174
|
target_enrichment_version: tuple[int, int],
|
|
1078
1175
|
target_parser_version: tuple[int, int],
|
|
1176
|
+
maximum_records: int = 1000,
|
|
1079
1177
|
) -> list[list[Any]]:
|
|
1080
1178
|
"""Retrieve documents which are not yet enriched with a given version."""
|
|
1081
1179
|
vars: query_dicts.GetPendingEnrichmentForVersionDict = {
|
|
@@ -1083,6 +1181,7 @@ class MarklogicApiClient:
|
|
|
1083
1181
|
"target_enrichment_minor_version": target_enrichment_version[1],
|
|
1084
1182
|
"target_parser_major_version": target_parser_version[0],
|
|
1085
1183
|
"target_parser_minor_version": target_parser_version[1],
|
|
1184
|
+
"maximum_records": maximum_records,
|
|
1086
1185
|
}
|
|
1087
1186
|
results: list[list[Any]] = json.loads(
|
|
1088
1187
|
get_single_string_from_marklogic_response(
|
|
@@ -1095,6 +1194,21 @@ class MarklogicApiClient:
|
|
|
1095
1194
|
|
|
1096
1195
|
return results
|
|
1097
1196
|
|
|
1197
|
+
def get_recently_enriched(
|
|
1198
|
+
self,
|
|
1199
|
+
) -> list[list[Any]]:
|
|
1200
|
+
"""Retrieve documents which are not yet enriched with a given version."""
|
|
1201
|
+
results: list[list[Any]] = json.loads(
|
|
1202
|
+
get_single_string_from_marklogic_response(
|
|
1203
|
+
self._send_to_eval(
|
|
1204
|
+
{},
|
|
1205
|
+
"get_recently_enriched.xqy",
|
|
1206
|
+
),
|
|
1207
|
+
),
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
return results
|
|
1211
|
+
|
|
1098
1212
|
def get_highest_parser_version(self) -> tuple[int, int]:
|
|
1099
1213
|
"""This gets the highest parser version in the database, so if nothing has been parsed with the most recent version of the parser, this won't reflect that change."""
|
|
1100
1214
|
table = json.loads(
|
|
@@ -1111,11 +1225,13 @@ class MarklogicApiClient:
|
|
|
1111
1225
|
def get_pending_parse_for_version(
|
|
1112
1226
|
self,
|
|
1113
1227
|
target_version: tuple[int, int],
|
|
1228
|
+
maximum_records: int = 1000,
|
|
1114
1229
|
) -> list[list[Any]]:
|
|
1115
1230
|
"""Retrieve documents which are not yet parsed with a given version."""
|
|
1116
1231
|
vars: query_dicts.GetPendingParseForVersionDict = {
|
|
1117
1232
|
"target_major_version": target_version[0],
|
|
1118
1233
|
"target_minor_version": target_version[1],
|
|
1234
|
+
"maximum_records": maximum_records,
|
|
1119
1235
|
}
|
|
1120
1236
|
results: list[list[Any]] = json.loads(
|
|
1121
1237
|
get_single_string_from_marklogic_response(
|
|
@@ -1127,3 +1243,74 @@ class MarklogicApiClient:
|
|
|
1127
1243
|
)
|
|
1128
1244
|
|
|
1129
1245
|
return results
|
|
1246
|
+
|
|
1247
|
+
def get_recently_parsed(
|
|
1248
|
+
self,
|
|
1249
|
+
) -> list[list[Any]]:
|
|
1250
|
+
"""Retrieve documents which are not yet enriched with a given version."""
|
|
1251
|
+
results: list[list[Any]] = json.loads(
|
|
1252
|
+
get_single_string_from_marklogic_response(
|
|
1253
|
+
self._send_to_eval(
|
|
1254
|
+
{},
|
|
1255
|
+
"get_recently_parsed.xqy",
|
|
1256
|
+
),
|
|
1257
|
+
),
|
|
1258
|
+
)
|
|
1259
|
+
|
|
1260
|
+
return results
|
|
1261
|
+
|
|
1262
|
+
def get_missing_fclid(
|
|
1263
|
+
self,
|
|
1264
|
+
maximum_records: int = 50,
|
|
1265
|
+
) -> list[str]:
|
|
1266
|
+
"""Retrieve the URIs of published documents which do not have an identifier in the `fclid` schema."""
|
|
1267
|
+
vars: query_dicts.GetMissingFclidDict = {
|
|
1268
|
+
"maximum_records": maximum_records,
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1272
|
+
self._send_to_eval(
|
|
1273
|
+
vars,
|
|
1274
|
+
"get_missing_fclid.xqy",
|
|
1275
|
+
)
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
return results
|
|
1279
|
+
|
|
1280
|
+
def resolve_from_identifier_slug(
|
|
1281
|
+
self, identifier_slug: DocumentIdentifierSlug, published_only: bool = True
|
|
1282
|
+
) -> IdentifierResolutions:
|
|
1283
|
+
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1284
|
+
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1285
|
+
vars: query_dicts.ResolveFromIdentifierSlugDict = {
|
|
1286
|
+
"identifier_slug": identifier_slug,
|
|
1287
|
+
"published_only": int(published_only),
|
|
1288
|
+
}
|
|
1289
|
+
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1290
|
+
self._send_to_eval(
|
|
1291
|
+
vars,
|
|
1292
|
+
"resolve_from_identifier_slug.xqy",
|
|
1293
|
+
),
|
|
1294
|
+
)
|
|
1295
|
+
return IdentifierResolutions.from_marklogic_output(raw_results)
|
|
1296
|
+
|
|
1297
|
+
def resolve_from_identifier_value(
|
|
1298
|
+
self, identifier_value: DocumentIdentifierValue, published_only: bool = True
|
|
1299
|
+
) -> IdentifierResolutions:
|
|
1300
|
+
"""Given a PUI/EUI url, look up the precomputed slug and return the
|
|
1301
|
+
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
|
|
1302
|
+
vars: query_dicts.ResolveFromIdentifierValueDict = {
|
|
1303
|
+
"identifier_value": identifier_value,
|
|
1304
|
+
"published_only": int(published_only),
|
|
1305
|
+
}
|
|
1306
|
+
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
|
|
1307
|
+
self._send_to_eval(
|
|
1308
|
+
vars,
|
|
1309
|
+
"resolve_from_identifier_value.xqy",
|
|
1310
|
+
),
|
|
1311
|
+
)
|
|
1312
|
+
return IdentifierResolutions.from_marklogic_output(raw_results)
|
|
1313
|
+
|
|
1314
|
+
def get_next_document_sequence_number(self) -> int:
|
|
1315
|
+
"""Increment the MarkLogic sequence number by one and return the value."""
|
|
1316
|
+
return int(self._eval_and_decode({}, "get_next_document_sequence_number.xqy"))
|