ds-caselaw-marklogic-api-client 38.0.0__tar.gz → 39.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/PKG-INFO +3 -2
  2. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/pyproject.toml +8 -6
  3. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/Client.py +15 -10
  4. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/__init__.py +13 -6
  5. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/xml.py +2 -1
  6. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/__init__.py +3 -0
  7. ds_caselaw_marklogic_api_client-39.1.0/src/caselawclient/models/identifiers/collection.py +170 -0
  8. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/exceptions.py +0 -4
  9. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/judgments.py +1 -0
  10. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/parser_logs.py +1 -0
  11. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/press_summaries.py +1 -0
  12. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xslt/modify_xml_live.xsl +17 -14
  13. ds_caselaw_marklogic_api_client-38.0.0/src/caselawclient/models/identifiers/collection.py +0 -102
  14. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/LICENSE.md +0 -0
  15. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/README.md +0 -0
  16. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/__init__.py +0 -0
  17. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  18. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  19. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/content_hash.py +0 -0
  20. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/errors.py +0 -0
  21. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/factories.py +0 -0
  22. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/identifier_resolution.py +0 -0
  23. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/__init__.py +0 -0
  24. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/body.py +0 -0
  25. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/exceptions.py +0 -0
  26. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/statuses.py +0 -0
  27. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
  28. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/fclid.py +0 -0
  29. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/neutral_citation.py +0 -0
  30. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/press_summary_ncn.py +0 -0
  31. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/identifiers/unpacker.py +0 -0
  32. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/neutral_citation_mixin.py +0 -0
  33. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  34. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/utilities/aws.py +0 -0
  35. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/utilities/dates.py +0 -0
  36. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/models/utilities/move.py +0 -0
  37. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/py.typed +0 -0
  38. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/responses/__init__.py +0 -0
  39. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/responses/search_response.py +0 -0
  40. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/responses/search_result.py +0 -0
  41. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  42. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/search_parameters.py +0 -0
  43. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/types.py +0 -0
  44. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xml_helpers.py +0 -0
  45. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_missing_fclid.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_next_document_sequence_number.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  72. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  73. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/resolve_from_identifier_slug.xqy +0 -0
  74. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/resolve_from_identifier_value.xqy +0 -0
  75. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  76. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  77. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  78. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  79. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  80. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  81. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  82. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  83. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
  84. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  85. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  86. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  87. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  88. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  89. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  90. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  91. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
  92. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xquery_type_dicts.py +0 -0
  93. {ds_caselaw_marklogic_api_client-38.0.0 → ds_caselaw_marklogic_api_client-39.1.0}/src/caselawclient/xslt/sample.xsl +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 38.0.0
3
+ Version: 39.1.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -11,8 +11,9 @@ Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
- Requires-Dist: certifi (>=2025.6.15,<2025.7.0)
14
+ Requires-Dist: certifi (>=2025.7.9,<2025.8.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
+ Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
16
17
  Requires-Dist: django-environ (>=0.12.0)
17
18
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
18
19
  Requires-Dist: idna (>=3.4,<4.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "38.0.0"
3
+ version = "39.1.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -12,7 +12,7 @@ packages = [
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = "^3.10.0"
15
- certifi = ">=2025.6.15,<2025.7.0"
15
+ certifi = ">=2025.7.9,<2025.8.0"
16
16
  charset-normalizer = "^3.0.0"
17
17
  django-environ = ">=0.12.0"
18
18
  idna = "^3.4"
@@ -29,15 +29,16 @@ pytz = ">2024"
29
29
  python-dateutil = "^2.9.0-post.0"
30
30
  saxonche = "^12.5.0"
31
31
  sqids = "^0.5.0"
32
+ defusedxml = "^0.7.1"
32
33
 
33
34
  [tool.poetry.group.dev.dependencies]
34
- coverage = "7.9.2"
35
+ coverage = "7.10.1"
35
36
  pytest = "8.4.1"
36
37
  beautifulsoup4 = "4.13.4"
37
38
  responses = "0.25.7"
38
39
  python-dotenv = "1.1.1"
39
40
  time-machine = "2.16.0"
40
- moto = {version = "5.1.6", extras = ["all"]}
41
+ moto = {version = "5.1.8", extras = ["all"]}
41
42
 
42
43
  [tool.poetry.group.docs]
43
44
  optional = true
@@ -69,14 +70,15 @@ line-length = 120
69
70
 
70
71
  [tool.ruff.lint]
71
72
  ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
72
- extend-select = ["W", "I", "SLF", "SIM"]
73
- # extend-select = [ "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
73
+ extend-select = ["W", "I", "SLF", "SIM", "C90", "S"]
74
+ # extend-select = [ "B", "Q", "I", "UP", "YTT", "ASYNC", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
74
75
  # "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "TID", "TCH", "INT", "PTH",
75
76
  # "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
76
77
  unfixable = ["ERA"]
77
78
 
78
79
  [tool.ruff.lint.extend-per-file-ignores]
79
80
  "tests/*" = ["S101"] # `assert` is fine in tests
81
+ "smoketest/*" = ["S101"] # `assert` is fine in tests
80
82
  "tests/client/test_client.py" = ["SLF001"] # TODO: This really shouldn't be the case, but it's not important to fix right now.
81
83
 
82
84
  # things skipped:
@@ -7,11 +7,12 @@ import warnings
7
7
  from datetime import datetime, time, timedelta
8
8
  from pathlib import Path
9
9
  from typing import Any, Optional, Type, Union
10
- from xml.etree import ElementTree
11
- from xml.etree.ElementTree import Element, ParseError, fromstring
10
+ from xml.etree.ElementTree import Element
12
11
 
13
12
  import environ
14
13
  import requests
14
+ from defusedxml import ElementTree
15
+ from defusedxml.ElementTree import ParseError, fromstring
15
16
  from ds_caselaw_utils.types import NeutralCitationString
16
17
  from lxml import etree
17
18
  from requests.auth import HTTPBasicAuth
@@ -258,10 +259,12 @@ class MarklogicApiClient:
258
259
  return "Unknown error, Marklogic returned a null or empty response"
259
260
  try:
260
261
  xml = fromstring(content_as_xml)
261
- return xml.find(
262
- "message-code",
263
- namespaces={"": "http://marklogic.com/xdmp/error"},
264
- ).text # type: ignore
262
+ return str(
263
+ xml.find(
264
+ "message-code",
265
+ namespaces={"": "http://marklogic.com/xdmp/error"},
266
+ ).text
267
+ )
265
268
  except (ParseError, TypeError, AttributeError):
266
269
  return "Unknown error, Marklogic returned a null or empty response"
267
270
 
@@ -682,10 +685,12 @@ class MarklogicApiClient:
682
685
  if content == "":
683
686
  return None
684
687
  response_xml = ElementTree.fromstring(content)
685
- return response_xml.find(
686
- "dls:annotation",
687
- namespaces={"dls": "http://marklogic.com/xdmp/dls"},
688
- ).text # type: ignore
688
+ return str(
689
+ response_xml.find(
690
+ "dls:annotation",
691
+ namespaces={"dls": "http://marklogic.com/xdmp/dls"},
692
+ ).text
693
+ )
689
694
 
690
695
  def get_judgment_version(
691
696
  self,
@@ -31,7 +31,7 @@ from caselawclient.models.utilities.aws import (
31
31
  request_parse,
32
32
  unpublish_documents,
33
33
  )
34
- from caselawclient.types import DocumentURIString
34
+ from caselawclient.types import DocumentURIString, SuccessFailureMessageTuple
35
35
 
36
36
  from .body import DocumentBody
37
37
  from .exceptions import CannotEnrichUnenrichableDocument, CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion
@@ -63,6 +63,9 @@ class Document:
63
63
  document_noun_plural = "documents"
64
64
  """ The noun for a plural of this document type. """
65
65
 
66
+ _default_reparse_document_type: Optional[str] = None
67
+ """ The default noun to pass to the parser when reparsing given the document type if known. This is used to determine how the document should be parsed and processed."""
68
+
66
69
  type_collection_name: str
67
70
 
68
71
  attributes_to_validate: list[tuple[str, bool, str]] = [
@@ -465,7 +468,6 @@ class Document:
465
468
  now = datetime.datetime.now(datetime.timezone.utc)
466
469
  self.api_client.set_property(self.uri, "last_sent_to_parser", now.isoformat())
467
470
 
468
- parser_type_noun = {"judgment": "judgment", "press summary": "pressSummary"}[self.document_noun]
469
471
  checked_date: Optional[str] = (
470
472
  self.body.document_date_as_date.isoformat()
471
473
  if self.body.document_date_as_date and self.body.document_date_as_date > datetime.date(1001, 1, 1)
@@ -477,16 +479,18 @@ class Document:
477
479
  # values are "" from the API, we should pass None instead in this case.
478
480
 
479
481
  parser_instructions: ParserInstructionsDict = {
480
- "documentType": parser_type_noun,
481
482
  "metadata": {
482
483
  "name": self.body.name or None,
483
484
  "cite": None,
484
485
  "court": self.body.court or None,
485
486
  "date": checked_date,
486
487
  "uri": self.uri,
487
- },
488
+ }
488
489
  }
489
490
 
491
+ if self._default_reparse_document_type:
492
+ parser_instructions["documentType"] = self._default_reparse_document_type
493
+
490
494
  ## TODO: Remove this hack around the fact that NCNs are assumed to be present for all documents' metadata, but actually different document classes may have different metadata
491
495
  if hasattr(self, "neutral_citation"):
492
496
  parser_instructions["metadata"]["cite"] = self.neutral_citation
@@ -521,9 +525,12 @@ class Document:
521
525
  """
522
526
  return self.body.has_content
523
527
 
528
+ def validate_identifiers(self) -> SuccessFailureMessageTuple:
529
+ return self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
530
+
524
531
  def save_identifiers(self) -> None:
525
532
  """Validate the identifiers, and if the validation passes save them to MarkLogic"""
526
- validations = self.identifiers.perform_all_validations(document_type=type(self), api_client=self.api_client)
533
+ validations = self.validate_identifiers()
527
534
  if validations.success is True:
528
535
  self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
529
536
  else:
@@ -570,7 +577,7 @@ class Document:
570
577
  def xml_with_correct_frbr(self) -> bytes:
571
578
  """Dynamically modify FRBR uris to reflect current storage location and FCL id"""
572
579
  fcl_identifiers = self.identifiers.of_type(FindCaseLawIdentifier)
573
- work_uri = f"https://caselaw.nationalarchives.gov.uk/id/{fcl_identifiers[0].url_slug}"
580
+ work_uri = f"https://caselaw.nationalarchives.gov.uk/id/doc/{fcl_identifiers[0].value}"
574
581
  expression_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}"
575
582
  manifestation_uri = f"https://caselaw.nationalarchives.gov.uk/{self.uri.lstrip('/')}/data.xml"
576
583
  return self.body.apply_xslt(
@@ -58,7 +58,8 @@ class XML:
58
58
  """XSLT transform this XML, given a stylesheet"""
59
59
  passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()}
60
60
  xslt_transform = etree.XSLT(etree.fromstring(xslt))
61
- return etree.tostring(xslt_transform(self.xml_as_tree, profile_run=False, **passable_values))
61
+ noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values)
62
+ return etree.tostring(noncanonical_xml, method="c14n2")
62
63
 
63
64
  def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
64
65
  """XSLT transform this XML, given a path to a stylesheet"""
@@ -149,6 +149,9 @@ class Identifier(ABC):
149
149
  @property
150
150
  def score(self) -> float:
151
151
  """Return the score of this identifier, used to calculate the preferred identifier for a document."""
152
+ if self.deprecated:
153
+ return 0
154
+
152
155
  return 1 * self.schema.base_score_multiplier
153
156
 
154
157
  def same_as(self, other: "Identifier") -> bool:
@@ -0,0 +1,170 @@
1
+ from typing import TYPE_CHECKING, Optional, Union
2
+
3
+ from lxml import etree
4
+
5
+ from caselawclient.types import SuccessFailureMessageTuple
6
+
7
+ from . import Identifier, IdentifierSchema
8
+ from .fclid import FindCaseLawIdentifier
9
+ from .neutral_citation import NeutralCitationNumber
10
+ from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
11
+
12
+ if TYPE_CHECKING:
13
+ from caselawclient.Client import MarklogicApiClient
14
+ from caselawclient.models.documents import Document
15
+
16
+ SUPPORTED_IDENTIFIER_TYPES: list[type["Identifier"]] = [
17
+ FindCaseLawIdentifier,
18
+ NeutralCitationNumber,
19
+ PressSummaryRelatedNCNIdentifier,
20
+ ]
21
+
22
+
23
+ class IdentifiersCollection(dict[str, Identifier]):
24
+ def validate_uuids_match_keys(self) -> SuccessFailureMessageTuple:
25
+ for uuid, identifier in self.items():
26
+ if uuid != identifier.uuid:
27
+ return SuccessFailureMessageTuple(
28
+ False, [f"Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"]
29
+ )
30
+
31
+ return SuccessFailureMessageTuple(True, [])
32
+
33
+ def _list_all_identifiers_by_schema(self) -> dict[type[IdentifierSchema], list[Identifier]]:
34
+ """Get a list of all identifiers, grouped by their schema."""
35
+ identifiers_by_schema: dict[type[IdentifierSchema], list[Identifier]] = {}
36
+
37
+ for identifier in self.values():
38
+ identifiers_by_schema.setdefault(identifier.schema, []).append(identifier)
39
+
40
+ return identifiers_by_schema
41
+
42
+ def check_only_single_non_deprecated_identifier_where_multiples_not_allowed(self) -> SuccessFailureMessageTuple:
43
+ """Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
44
+
45
+ for schema, identifiers in self._list_all_identifiers_by_schema().items():
46
+ non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
47
+ if len(non_deprecated_identifiers) > 1:
48
+ return SuccessFailureMessageTuple(
49
+ False,
50
+ [
51
+ f"Multiple non-deprecated identifiers found for schema '{schema.name}': {', '.join(i.value for i in non_deprecated_identifiers)}"
52
+ ],
53
+ )
54
+
55
+ return SuccessFailureMessageTuple(True, [])
56
+
57
+ def _perform_collection_level_validations(self) -> SuccessFailureMessageTuple:
58
+ """Perform identifier validations which are only possible at the collection level, such as UUID integrity and identifying exclusivity problems."""
59
+
60
+ success = True
61
+ messages: list[str] = []
62
+
63
+ collection_validations_to_run: list[SuccessFailureMessageTuple] = [
64
+ self.validate_uuids_match_keys(),
65
+ self.check_only_single_non_deprecated_identifier_where_multiples_not_allowed(),
66
+ ]
67
+
68
+ for validation in collection_validations_to_run:
69
+ if not validation.success:
70
+ success = False
71
+ messages += validation.messages
72
+
73
+ return SuccessFailureMessageTuple(success, messages)
74
+
75
+ def _perform_identifier_level_validations(
76
+ self, document_type: type["Document"], api_client: "MarklogicApiClient"
77
+ ) -> SuccessFailureMessageTuple:
78
+ """Perform identifier validations at the individual identifier level."""
79
+
80
+ success = True
81
+ messages: list[str] = []
82
+
83
+ for _, identifier in self.items():
84
+ validations = identifier.perform_all_validations(document_type=document_type, api_client=api_client)
85
+ if validations.success is False:
86
+ success = False
87
+
88
+ messages += validations.messages
89
+
90
+ return SuccessFailureMessageTuple(success, messages)
91
+
92
+ def perform_all_validations(
93
+ self, document_type: type["Document"], api_client: "MarklogicApiClient"
94
+ ) -> SuccessFailureMessageTuple:
95
+ """Perform all possible identifier validations on this collection, both at the individual and collection level."""
96
+
97
+ identifier_level_success, identifier_level_messages = self._perform_identifier_level_validations(
98
+ document_type=document_type, api_client=api_client
99
+ )
100
+ collection_level_success, collection_level_messages = self._perform_collection_level_validations()
101
+
102
+ success = all([identifier_level_success, collection_level_success])
103
+ all_messages = identifier_level_messages + collection_level_messages
104
+
105
+ return SuccessFailureMessageTuple(success, all_messages)
106
+
107
+ def contains(self, other_identifier: Identifier) -> bool:
108
+ """Does the identifier's value and namespace already exist in this group?"""
109
+ return any(other_identifier.same_as(identifier) for identifier in self.values())
110
+
111
+ def add(self, identifier: Identifier) -> None:
112
+ if not self.contains(identifier):
113
+ self[identifier.uuid] = identifier
114
+
115
+ def valid_new_identifier_types(self, document_type: type["Document"]) -> list[type[Identifier]]:
116
+ """Return a list of identifier types which can be added to a document of the given type, given identifiers already in this collection."""
117
+ return [
118
+ t
119
+ for t in SUPPORTED_IDENTIFIER_TYPES
120
+ if t.schema.allow_editing
121
+ and (not t.schema.document_types or document_type.__name__ in t.schema.document_types)
122
+ ]
123
+
124
+ def __delitem__(self, key: Union[Identifier, str]) -> None:
125
+ if isinstance(key, Identifier):
126
+ super().__delitem__(key.uuid)
127
+ else:
128
+ super().__delitem__(key)
129
+
130
+ def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
131
+ """Return a list of all identifiers of a given type."""
132
+ uuids = self.keys()
133
+ return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
134
+
135
+ def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
136
+ "For when we want an identifier to be the only valid identifier of that type, delete the others first"
137
+ uuids = self.keys()
138
+ for uuid in list(uuids):
139
+ # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
140
+ if isinstance(self[uuid], deleted_identifier_type):
141
+ del self[uuid]
142
+
143
+ @property
144
+ def as_etree(self) -> etree._Element:
145
+ """Return an etree representation of all the Document's identifiers."""
146
+ identifiers_root = etree.Element("identifiers")
147
+
148
+ for identifier in self.values():
149
+ identifiers_root.append(identifier.as_xml_tree)
150
+
151
+ return identifiers_root
152
+
153
+ def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
154
+ """
155
+ :param type: Optionally, an identifier type to constrain this list to.
156
+
157
+ :return: Return a list of identifiers, sorted by their score in descending order.
158
+ """
159
+ identifiers = self.of_type(type) if type else list(self.values())
160
+ return sorted(identifiers, key=lambda v: v.score, reverse=True)
161
+
162
+ def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
163
+ """
164
+ :param type: Optionally, an identifier type to constrain the results to.
165
+
166
+ :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
167
+ """
168
+ if len(self.by_score(type)) == 0:
169
+ return None
170
+ return self.by_score(type)[0]
@@ -2,9 +2,5 @@ class InvalidIdentifierXMLRepresentationException(Exception):
2
2
  pass
3
3
 
4
4
 
5
- class UUIDMismatchError(Exception):
6
- pass
7
-
8
-
9
5
  class IdentifierValidationException(Exception):
10
6
  pass
@@ -24,6 +24,7 @@ class Judgment(NeutralCitationMixin, Document):
24
24
  document_noun = "judgment"
25
25
  document_noun_plural = "judgments"
26
26
  type_collection_name = "judgment"
27
+ _default_reparse_document_type = "judgment"
27
28
 
28
29
  def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
29
30
  super().__init__(self.document_noun, uri, *args, **kwargs)
@@ -11,3 +11,4 @@ class ParserLog(Document):
11
11
  document_noun = "parser log"
12
12
  document_noun_plural = "parser logs"
13
13
  type_collection_name = "parser-log"
14
+ _default_reparse_document_type = "parserLog"
@@ -25,6 +25,7 @@ class PressSummary(NeutralCitationMixin, Document):
25
25
  document_noun = "press summary"
26
26
  document_noun_plural = "press summaries"
27
27
  type_collection_name = "press-summary"
28
+ _default_reparse_document_type = "pressSummary"
28
29
 
29
30
  def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
30
31
  super().__init__(self.document_noun, uri, *args, **kwargs)
@@ -1,6 +1,9 @@
1
1
  <?xml version="1.0"?>
2
- <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:uk='https://caselaw.nationalarchives.gov.uk/akn' xmlns:akn='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'>
3
-
2
+ <xsl:stylesheet version="1.0"
3
+ xmlns='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'
4
+ xmlns:akn='http://docs.oasis-open.org/legaldocml/ns/akn/3.0'
5
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
6
+ xmlns:uk='https://caselaw.nationalarchives.gov.uk/akn'>
4
7
  <xsl:param name="work_uri" />
5
8
  <xsl:param name="expression_uri" />
6
9
  <xsl:param name="manifestation_uri" />
@@ -16,51 +19,51 @@
16
19
  <!-- <xsl:template match="akn:identification/FRBRWork/FRBRthistext/text()"><xsl:copy-of select="$cat" /></xsl:template> -->
17
20
 
18
21
  <xsl:template match="akn:identification/akn:FRBRWork/akn:FRBRthis">
19
- <akn:FRBRthis>
22
+ <FRBRthis>
20
23
  <xsl:attribute name="value">
21
24
  <xsl:value-of select="$work_uri" />
22
25
  </xsl:attribute>
23
- </akn:FRBRthis>
26
+ </FRBRthis>
24
27
  </xsl:template>
25
28
 
26
29
  <xsl:template match="akn:identification/akn:FRBRWork/akn:FRBRuri">
27
- <akn:FRBRuri>
30
+ <FRBRuri>
28
31
  <xsl:attribute name="value">
29
32
  <xsl:value-of select="$work_uri" />
30
33
  </xsl:attribute>
31
- </akn:FRBRuri>
34
+ </FRBRuri>
32
35
  </xsl:template>
33
36
 
34
37
  <xsl:template match="akn:identification/akn:FRBRExpression/akn:FRBRthis">
35
- <akn:FRBRthis>
38
+ <FRBRthis>
36
39
  <xsl:attribute name="value">
37
40
  <xsl:value-of select="$expression_uri" />
38
41
  </xsl:attribute>
39
- </akn:FRBRthis>
42
+ </FRBRthis>
40
43
  </xsl:template>
41
44
 
42
45
  <xsl:template match="akn:identification/akn:FRBRExpression/akn:FRBRuri">
43
- <akn:FRBRuri>
46
+ <FRBRuri>
44
47
  <xsl:attribute name="value">
45
48
  <xsl:value-of select="$expression_uri" />
46
49
  </xsl:attribute>
47
- </akn:FRBRuri>
50
+ </FRBRuri>
48
51
  </xsl:template>
49
52
 
50
53
  <xsl:template match="akn:identification/akn:FRBRManifestation/akn:FRBRthis">
51
- <akn:FRBRthis>
54
+ <FRBRthis>
52
55
  <xsl:attribute name="value">
53
56
  <xsl:value-of select="$manifestation_uri" />
54
57
  </xsl:attribute>
55
- </akn:FRBRthis>
58
+ </FRBRthis>
56
59
  </xsl:template>
57
60
 
58
61
  <xsl:template match="akn:identification/akn:FRBRManifestation/akn:FRBRuri">
59
- <akn:FRBRuri>
62
+ <FRBRuri>
60
63
  <xsl:attribute name="value">
61
64
  <xsl:value-of select="$manifestation_uri" />
62
65
  </xsl:attribute>
63
- </akn:FRBRuri>
66
+ </FRBRuri>
64
67
  </xsl:template>
65
68
 
66
69
 
@@ -1,102 +0,0 @@
1
- from typing import TYPE_CHECKING, Optional, Union
2
-
3
- from lxml import etree
4
-
5
- from caselawclient.types import SuccessFailureMessageTuple
6
-
7
- from . import Identifier
8
- from .exceptions import UUIDMismatchError
9
- from .fclid import FindCaseLawIdentifier
10
- from .neutral_citation import NeutralCitationNumber
11
- from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
12
-
13
- if TYPE_CHECKING:
14
- from caselawclient.Client import MarklogicApiClient
15
- from caselawclient.models.documents import Document
16
-
17
- SUPPORTED_IDENTIFIER_TYPES: list[type["Identifier"]] = [
18
- FindCaseLawIdentifier,
19
- NeutralCitationNumber,
20
- PressSummaryRelatedNCNIdentifier,
21
- ]
22
-
23
-
24
- class IdentifiersCollection(dict[str, Identifier]):
25
- def validate_uuids_match_keys(self) -> None:
26
- for uuid, identifier in self.items():
27
- if uuid != identifier.uuid:
28
- msg = "Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"
29
- raise UUIDMismatchError(msg)
30
-
31
- def perform_all_validations(
32
- self, document_type: type["Document"], api_client: "MarklogicApiClient"
33
- ) -> SuccessFailureMessageTuple:
34
- self.validate_uuids_match_keys()
35
-
36
- success = True
37
- messages: list[str] = []
38
-
39
- for _, identifier in self.items():
40
- validations = identifier.perform_all_validations(document_type=document_type, api_client=api_client)
41
- if validations.success is False:
42
- success = False
43
-
44
- messages += validations.messages
45
-
46
- return SuccessFailureMessageTuple(success, messages)
47
-
48
- def contains(self, other_identifier: Identifier) -> bool:
49
- "Do the identifier's value and namespace already exist in this group?"
50
- return any(other_identifier.same_as(identifier) for identifier in self.values())
51
-
52
- def add(self, identifier: Identifier) -> None:
53
- if not self.contains(identifier):
54
- self[identifier.uuid] = identifier
55
-
56
- def __delitem__(self, key: Union[Identifier, str]) -> None:
57
- if isinstance(key, Identifier):
58
- super().__delitem__(key.uuid)
59
- else:
60
- super().__delitem__(key)
61
-
62
- def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
63
- """Return a list of all identifiers of a given type."""
64
- uuids = self.keys()
65
- return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
66
-
67
- def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
68
- "For when we want an identifier to be the only valid identifier of that type, delete the others first"
69
- uuids = self.keys()
70
- for uuid in list(uuids):
71
- # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
72
- if isinstance(self[uuid], deleted_identifier_type):
73
- del self[uuid]
74
-
75
- @property
76
- def as_etree(self) -> etree._Element:
77
- """Return an etree representation of all the Document's identifiers."""
78
- identifiers_root = etree.Element("identifiers")
79
-
80
- for identifier in self.values():
81
- identifiers_root.append(identifier.as_xml_tree)
82
-
83
- return identifiers_root
84
-
85
- def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
86
- """
87
- :param type: Optionally, an identifier type to constrain this list to.
88
-
89
- :return: Return a list of identifiers, sorted by their score in descending order.
90
- """
91
- identifiers = self.of_type(type) if type else list(self.values())
92
- return sorted(identifiers, key=lambda v: v.score, reverse=True)
93
-
94
- def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
95
- """
96
- :param type: Optionally, an identifier type to constrain the results to.
97
-
98
- :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
99
- """
100
- if len(self.by_score(type)) == 0:
101
- return None
102
- return self.by_score(type)[0]