ds-caselaw-marklogic-api-client 29.0.0a2__tar.gz → 29.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

Files changed (84) hide show
  1. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/PKG-INFO +3 -2
  2. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/pyproject.toml +3 -2
  3. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/Client.py +20 -0
  4. ds_caselaw_marklogic_api_client-29.1.0/src/caselawclient/identifier_resolution.py +43 -0
  5. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/__init__.py +23 -8
  6. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/identifiers/__init__.py +57 -6
  7. ds_caselaw_marklogic_api_client-29.1.0/src/caselawclient/models/identifiers/fclid.py +50 -0
  8. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/identifiers/neutral_citation.py +2 -0
  9. ds_caselaw_marklogic_api_client-29.1.0/src/caselawclient/models/identifiers/press_summary_ncn.py +20 -0
  10. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/identifiers/unpacker.py +12 -3
  11. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/judgments.py +10 -13
  12. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/neutral_citation_mixin.py +8 -4
  13. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/press_summaries.py +9 -12
  14. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/utilities/aws.py +0 -2
  15. ds_caselaw_marklogic_api_client-29.1.0/src/caselawclient/xquery/get_next_document_sequence_number.xqy +14 -0
  16. ds_caselaw_marklogic_api_client-29.1.0/src/caselawclient/xquery/resolve_from_identifier.xqy +17 -0
  17. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery_type_dicts.py +6 -0
  18. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/LICENSE.md +0 -0
  19. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/README.md +0 -0
  20. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/__init__.py +0 -0
  21. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/client_helpers/__init__.py +0 -0
  22. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/client_helpers/search_helpers.py +0 -0
  23. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/content_hash.py +0 -0
  24. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/errors.py +0 -0
  25. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/factories.py +0 -0
  26. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/__init__.py +0 -0
  27. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/body.py +0 -0
  28. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/exceptions.py +0 -0
  29. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/statuses.py +0 -0
  30. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/transforms/html.xsl +0 -0
  31. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/documents/xml.py +0 -0
  32. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/utilities/__init__.py +0 -0
  33. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/utilities/dates.py +0 -0
  34. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/models/utilities/move.py +0 -0
  35. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/py.typed +0 -0
  36. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/responses/__init__.py +0 -0
  37. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/responses/search_response.py +0 -0
  38. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/responses/search_result.py +0 -0
  39. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/responses/xsl/search_match.xsl +0 -0
  40. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/search_parameters.py +0 -0
  41. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xml_helpers.py +0 -0
  42. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/break_judgment_checkout.xqy +0 -0
  43. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/checkin_judgment.xqy +0 -0
  44. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/checkout_judgment.xqy +0 -0
  45. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/copy_document.xqy +0 -0
  46. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/delete_judgment.xqy +0 -0
  47. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/document_collections.xqy +0 -0
  48. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/document_exists.xqy +0 -0
  49. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_combined_stats_table.xqy +0 -0
  50. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_components_for_document.xqy +0 -0
  51. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_highest_enrichment_version.xqy +0 -0
  52. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_highest_parser_version.xqy +0 -0
  53. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_judgment.xqy +0 -0
  54. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_judgment_checkout_status.xqy +0 -0
  55. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_judgment_version.xqy +0 -0
  56. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_last_modified.xqy +0 -0
  57. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +0 -0
  58. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_pending_parse_for_version.xqy +0 -0
  59. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_properties_for_search_results.xqy +0 -0
  60. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_property.xqy +0 -0
  61. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_property_as_node.xqy +0 -0
  62. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_recently_enriched.xqy +0 -0
  63. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_recently_parsed.xqy +0 -0
  64. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_version_annotation.xqy +0 -0
  65. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/get_version_created.xqy +0 -0
  66. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/insert_document.xqy +0 -0
  67. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/list_judgment_versions.xqy +0 -0
  68. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_boolean_property.xqy +0 -0
  69. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_citation.xqy +0 -0
  70. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_court.xqy +0 -0
  71. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_jurisdiction.xqy +0 -0
  72. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_name.xqy +0 -0
  73. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_this_uri.xqy +0 -0
  74. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_metadata_work_expression_date.xqy +0 -0
  75. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_property.xqy +0 -0
  76. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/set_property_as_node.xqy +0 -0
  77. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/update_document.xqy +0 -0
  78. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/update_locked_judgment.xqy +0 -0
  79. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/user_has_privilege.xqy +0 -0
  80. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/user_has_role.xqy +0 -0
  81. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/validate_all_documents.xqy +0 -0
  82. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/validate_document.xqy +0 -0
  83. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/xslt.xqy +0 -0
  84. {ds_caselaw_marklogic_api_client-29.0.0a2 → ds_caselaw_marklogic_api_client-29.1.0}/src/caselawclient/xquery/xslt_transform.xqy +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 29.0.0a2
3
+ Version: 29.1.0
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
- Requires-Dist: certifi (>=2024.8.30,<2024.9.0)
14
+ Requires-Dist: certifi (>=2024.12.14,<2024.13.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.11.0,<0.12.0)
17
17
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
@@ -25,6 +25,7 @@ Requires-Dist: pytz (>=2024.1,<2025.0)
25
25
  Requires-Dist: requests (>=2.28.2,<3.0.0)
26
26
  Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
27
27
  Requires-Dist: saxonche (>=12.5.0,<13.0.0)
28
+ Requires-Dist: sqids (>=0.5.0,<0.6.0)
28
29
  Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
29
30
  Description-Content-Type: text/markdown
30
31
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ds-caselaw-marklogic-api-client"
3
- version = "29.0.0-alpha.2"
3
+ version = "29.1.0"
4
4
  description = "An API client for interacting with the underlying data in Find Caselaw."
5
5
  authors = ["The National Archives"]
6
6
  homepage = "https://github.com/nationalarchives/ds-caselaw-custom-api-client"
@@ -12,7 +12,7 @@ packages = [
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = "^3.9"
15
- certifi = ">=2024.8.30,<2024.9.0"
15
+ certifi = ">=2024.12.14,<2024.13.0"
16
16
  charset-normalizer = "^3.0.0"
17
17
  django-environ = "^0.11.0"
18
18
  idna = "^3.4"
@@ -28,6 +28,7 @@ mypy-boto3-sns = "^1.26.69"
28
28
  pytz = "^2024.1"
29
29
  python-dateutil = "^2.9.0-post.0"
30
30
  saxonche = "^12.5.0"
31
+ sqids = "^0.5.0"
31
32
 
32
33
  [tool.poetry.group.dev.dependencies]
33
34
  coverage = "^7.2.3"
@@ -20,6 +20,7 @@ from requests_toolbelt.multipart import decoder
20
20
 
21
21
  from caselawclient import xquery_type_dicts as query_dicts
22
22
  from caselawclient.client_helpers import VersionAnnotation
23
+ from caselawclient.identifier_resolution import IdentifierResolutions
23
24
  from caselawclient.models.documents import (
24
25
  DOCUMENT_COLLECTION_URI_JUDGMENT,
25
26
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
@@ -1201,3 +1202,22 @@ class MarklogicApiClient:
1201
1202
  )
1202
1203
 
1203
1204
  return results
1205
+
1206
+ def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1207
+ """Given a PUI/EUI url, look up the precomputed slug and return the
1208
+ MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1209
+ vars: query_dicts.ResolveFromIdentifierDict = {
1210
+ "identifier_uri": DocumentURIString(identifier_uri),
1211
+ "published_only": int(published_only),
1212
+ }
1213
+ raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1214
+ self._send_to_eval(
1215
+ vars,
1216
+ "resolve_from_identifier.xqy",
1217
+ ),
1218
+ )
1219
+ return IdentifierResolutions.from_marklogic_output(raw_results)
1220
+
1221
+ def get_next_document_sequence_number(self) -> int:
1222
+ """Increment the MarkLogic sequence number by one and return the value."""
1223
+ return int(self._eval_and_decode({}, "get_next_document_sequence_number.xqy"))
@@ -0,0 +1,43 @@
1
+ import json
2
+ from typing import NamedTuple
3
+
4
+ from caselawclient.models.documents import DocumentURIString
5
+ from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
6
+
7
+
8
+ class IdentifierResolutions(list["IdentifierResolution"]):
9
+ """
10
+ A list of candidate MarkLogic documents which correspond to a Public UI uri
11
+
12
+ MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
13
+ which corresponds to a single identifier to MarkLogic document mapping.
14
+
15
+ see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py`
16
+ """
17
+
18
+ @staticmethod
19
+ def from_marklogic_output(table: list[str]) -> "IdentifierResolutions":
20
+ return IdentifierResolutions(list(IdentifierResolution.from_marklogic_output(row) for row in table))
21
+
22
+ def published(self) -> "IdentifierResolutions":
23
+ "Filter the list so that only published documents are returned"
24
+ return IdentifierResolutions(list(x for x in self if x.document_published))
25
+
26
+
27
+ class IdentifierResolution(NamedTuple):
28
+ """A single response from MarkLogic about a single identifier / document mapping"""
29
+
30
+ identifier_uuid: str
31
+ document_uri: MarkLogicDocumentURIString
32
+ identifier_slug: DocumentURIString
33
+ document_published: bool
34
+
35
+ @staticmethod
36
+ def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
37
+ row = json.loads(raw_row)
38
+ return IdentifierResolution(
39
+ identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
40
+ document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
41
+ identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
42
+ document_published=row["documents.compiled_url_slugs.document_published"] == "true",
43
+ )
@@ -15,6 +15,8 @@ from caselawclient.errors import (
15
15
  NotSupportedOnVersion,
16
16
  OnlySupportedOnVersion,
17
17
  )
18
+ from caselawclient.models.identifiers import Identifier
19
+ from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
18
20
  from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
19
21
  from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
20
22
  from caselawclient.models.utilities.aws import (
@@ -170,13 +172,11 @@ class Document:
170
172
  self.identifiers = unpack_all_identifiers_from_etree(identifiers_element_as_etree)
171
173
 
172
174
  @property
173
- def best_human_identifier(self) -> Optional[str]:
174
- """
175
- Some identifier that is understood by legal professionals to refer to this legal event
176
- that is not the name of the document.
177
- Typically, this will be the neutral citation number, should it exist.
178
- Should typically be overridden in subclasses.
179
- """
175
+ def best_human_identifier(self) -> Optional[Identifier]:
176
+ """Return the preferred identifier for the document, providing that it is considered human readable."""
177
+ preferred_identifier = self.identifiers.preferred()
178
+ if preferred_identifier and preferred_identifier.schema.human_readable:
179
+ return preferred_identifier
180
180
  return None
181
181
 
182
182
  @property
@@ -432,6 +432,12 @@ class Document:
432
432
  if not self.is_publishable:
433
433
  raise CannotPublishUnpublishableDocument
434
434
 
435
+ ## If it doesn't already have one, get a new FCLID for this document and assign
436
+ if len(self.identifiers.of_type(FindCaseLawIdentifier)) < 1:
437
+ document_fclid = FindCaseLawIdentifierSchema.mint(self.api_client)
438
+ self.identifiers.add(document_fclid)
439
+ self.save_identifiers()
440
+
435
441
  publish_documents(uri_for_s3(self.uri))
436
442
  self.api_client.set_published(self.uri, True)
437
443
  announce_document_event(
@@ -500,13 +506,17 @@ class Document:
500
506
  "documentType": parser_type_noun,
501
507
  "metadata": {
502
508
  "name": self.body.name or None,
503
- "cite": self.best_human_identifier or None,
509
+ "cite": None,
504
510
  "court": self.body.court or None,
505
511
  "date": checked_date,
506
512
  "uri": self.uri,
507
513
  },
508
514
  }
509
515
 
516
+ ## TODO: Remove this hack around the fact that NCNs are assumed to be present for all documents' metadata, but actually different document classes may have different metadata
517
+ if hasattr(self, "neutral_citation"):
518
+ parser_instructions["metadata"]["cite"] = self.neutral_citation
519
+
510
520
  request_parse(
511
521
  uri=self.uri,
512
522
  reference=self.consignment_reference,
@@ -530,6 +540,11 @@ class Document:
530
540
  """
531
541
  return self.docx_exists()
532
542
 
543
+ def save_identifiers(self) -> None:
544
+ """Save the current state of this Document's identifiers to MarkLogic."""
545
+ self.identifiers.validate()
546
+ self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
547
+
533
548
  def __getattr__(self, name: str) -> Any:
534
549
  warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
535
550
  try:
@@ -32,13 +32,20 @@ class IdentifierSchema(ABC):
32
32
  name: str
33
33
  namespace: str
34
34
 
35
+ human_readable: bool
36
+ """ Should this identifier type be considered for display as a 'human readable' identifier? """
37
+
38
+ base_score_multiplier: float = 1.0
39
+ """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
40
+
35
41
  def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
36
42
  """Ensure that subclasses have the required attributes set."""
37
43
  for required in (
38
44
  "name",
39
45
  "namespace",
46
+ "human_readable",
40
47
  ):
41
- if not getattr(cls, required, False):
48
+ if not hasattr(cls, required):
42
49
  raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
43
50
  super().__init_subclass__(**kwargs)
44
51
 
@@ -76,6 +83,9 @@ class Identifier(ABC):
76
83
  def __repr__(self) -> str:
77
84
  return f"<{self.schema.name} {self.value}: {self.uuid}>"
78
85
 
86
+ def __str__(self) -> str:
87
+ return self.value
88
+
79
89
  def __init__(self, value: str, uuid: Optional[str] = None) -> None:
80
90
  self.value = value
81
91
  if uuid:
@@ -101,6 +111,15 @@ class Identifier(ABC):
101
111
  def url_slug(self) -> str:
102
112
  return self.schema.compile_identifier_url_slug(self.value)
103
113
 
114
+ @property
115
+ def score(self) -> float:
116
+ """Return the score of this identifier, used to calculate the preferred identifier for a document."""
117
+ return 1 * self.schema.base_score_multiplier
118
+
119
+ def same_as(self, other: "Identifier") -> bool:
120
+ "Is this the same as another identifier (in value and schema)?"
121
+ return self.value == other.value and self.schema == other.schema
122
+
104
123
 
105
124
  class Identifiers(dict[str, Identifier]):
106
125
  def validate(self) -> None:
@@ -109,8 +128,13 @@ class Identifiers(dict[str, Identifier]):
109
128
  msg = "Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"
110
129
  raise UUIDMismatchError(msg)
111
130
 
131
+ def contains(self, other_identifier: Identifier) -> bool:
132
+ "Do the identifier's value and namespace already exist in this group?"
133
+ return any(other_identifier.same_as(identifier) for identifier in self.values())
134
+
112
135
  def add(self, identifier: Identifier) -> None:
113
- self[identifier.uuid] = identifier
136
+ if not self.contains(identifier):
137
+ self[identifier.uuid] = identifier
114
138
 
115
139
  def __delitem__(self, key: Union[Identifier, str]) -> None:
116
140
  if isinstance(key, Identifier):
@@ -118,6 +142,19 @@ class Identifiers(dict[str, Identifier]):
118
142
  else:
119
143
  super().__delitem__(key)
120
144
 
145
+ def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
146
+ """Return a list of all identifiers of a given type."""
147
+ uuids = self.keys()
148
+ return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
149
+
150
+ def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
151
+ "For when we want an identifier to be the only valid identifier of that type, delete the others first"
152
+ uuids = self.keys()
153
+ for uuid in list(uuids):
154
+ # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
155
+ if isinstance(self[uuid], deleted_identifier_type):
156
+ del self[uuid]
157
+
121
158
  @property
122
159
  def as_etree(self) -> etree._Element:
123
160
  """Return an etree representation of all the Document's identifiers."""
@@ -128,7 +165,21 @@ class Identifiers(dict[str, Identifier]):
128
165
 
129
166
  return identifiers_root
130
167
 
131
- def save(self, document) -> None: # type: ignore[no-untyped-def, unused-ignore]
132
- """Save the current state of this Document's identifiers to MarkLogic."""
133
- self.validate()
134
- document.api_client.set_property_as_node(document.uri, "identifiers", self.as_etree)
168
+ def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
169
+ """
170
+ :param type: Optionally, an identifier type to constrain this list to.
171
+
172
+ :return: Return a list of identifiers, sorted by their score in descending order.
173
+ """
174
+ identifiers = self.of_type(type) if type else list(self.values())
175
+ return sorted(identifiers, key=lambda v: v.score, reverse=True)
176
+
177
+ def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
178
+ """
179
+ :param type: Optionally, an identifier type to constrain the results to.
180
+
181
+ :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
182
+ """
183
+ if len(self.by_score(type)) == 0:
184
+ return None
185
+ return self.by_score(type)[0]
@@ -0,0 +1,50 @@
1
+ import re
2
+ from typing import TYPE_CHECKING
3
+
4
+ from sqids import Sqids
5
+
6
+ from . import Identifier, IdentifierSchema
7
+
8
+ if TYPE_CHECKING:
9
+ from caselawclient.Client import MarklogicApiClient
10
+
11
+
12
+ VALID_FCLID_PATTERN = re.compile(r"^[bcdfghjkmnpqrstvwxyz23456789]{4,}$")
13
+
14
+ FCLID_MINIMUM_LENGTH = 8
15
+ FCLID_ALPHABET = "bcdfghjkmnpqrstvwxyz23456789"
16
+
17
+ sqids = Sqids(
18
+ min_length=FCLID_MINIMUM_LENGTH,
19
+ alphabet=FCLID_ALPHABET,
20
+ )
21
+
22
+
23
+ class FindCaseLawIdentifierSchema(IdentifierSchema):
24
+ """
25
+ Identifier schema describing a Find Case Law Identifier.
26
+ """
27
+
28
+ name = "Find Case Law Identifier"
29
+ namespace = "fclid"
30
+ human_readable = False
31
+ base_score_multiplier = 0.8
32
+
33
+ @classmethod
34
+ def validate_identifier(cls, value: str) -> bool:
35
+ return bool(VALID_FCLID_PATTERN.match(value))
36
+
37
+ @classmethod
38
+ def compile_identifier_url_slug(cls, value: str) -> str:
39
+ return "tna." + value
40
+
41
+ @classmethod
42
+ def mint(cls, api_client: "MarklogicApiClient") -> "FindCaseLawIdentifier":
43
+ """Generate a totally new Find Case Law identifier."""
44
+ next_sequence_number = api_client.get_next_document_sequence_number()
45
+ new_identifier = sqids.encode([next_sequence_number])
46
+ return FindCaseLawIdentifier(value=new_identifier)
47
+
48
+
49
+ class FindCaseLawIdentifier(Identifier):
50
+ schema = FindCaseLawIdentifierSchema
@@ -30,6 +30,8 @@ class NeutralCitationNumberSchema(IdentifierSchema):
30
30
 
31
31
  name = "Neutral Citation Number"
32
32
  namespace = "ukncn"
33
+ human_readable = True
34
+ base_score_multiplier = 1.5
33
35
 
34
36
  @classmethod
35
37
  def validate_identifier(cls, value: str) -> bool:
@@ -0,0 +1,20 @@
1
+ from .neutral_citation import NeutralCitationNumber, NeutralCitationNumberSchema
2
+
3
+
4
+ class PressSummaryRelatedNCNIdentifierSchema(NeutralCitationNumberSchema):
5
+ """
6
+ Identifier schema for relating a Press Summary to a Judgment with a given NCN
7
+ """
8
+
9
+ name = "Press Summary relates to NCN"
10
+ namespace = "uksummaryofncn"
11
+ human_readable = True
12
+ base_score_multiplier = 0.5
13
+
14
+ @classmethod
15
+ def compile_identifier_url_slug(cls, value: str) -> str:
16
+ return super().compile_identifier_url_slug(value) + "/press-summary"
17
+
18
+
19
+ class PressSummaryRelatedNCNIdentifier(NeutralCitationNumber):
20
+ schema = NeutralCitationNumberSchema
@@ -1,11 +1,14 @@
1
1
  from typing import Optional
2
+ from warnings import warn
2
3
 
3
4
  from lxml import etree
4
5
 
5
6
  from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier, Identifiers, InvalidIdentifierXMLRepresentationException
7
+ from .fclid import FindCaseLawIdentifier
6
8
  from .neutral_citation import NeutralCitationNumber
7
9
 
8
10
  IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = {
11
+ "fclid": FindCaseLawIdentifier,
9
12
  "ukncn": NeutralCitationNumber,
10
13
  }
11
14
 
@@ -17,12 +20,13 @@ def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element
17
20
  return identifiers
18
21
  for identifier_etree in identifiers_etree.findall("identifier"):
19
22
  identifier = unpack_an_identifier_from_etree(identifier_etree)
20
- identifiers.add(identifier)
23
+ if identifier:
24
+ identifiers.add(identifier)
21
25
  return identifiers
22
26
 
23
27
 
24
- def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifier:
25
- """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier."""
28
+ def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]:
29
+ """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`)."""
26
30
 
27
31
  namespace_element = identifier_xml.find("namespace")
28
32
 
@@ -31,6 +35,11 @@ def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifie
31
35
  "Identifer XML representation is not valid: namespace not present or empty"
32
36
  )
33
37
 
38
+ # If the identifier namespace isn't known, fail out
39
+ if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP:
40
+ warn(f"Identifier type {namespace_element.text} is not known.")
41
+ return None
42
+
34
43
  kwargs: dict[str, str] = {}
35
44
 
36
45
  for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES:
@@ -25,20 +25,17 @@ class Judgment(NeutralCitationMixin, Document):
25
25
  super().__init__(self.document_noun, uri, *args, **kwargs)
26
26
 
27
27
  @cached_property
28
- def neutral_citation(self) -> NeutralCitationString:
29
- return NeutralCitationString(
30
- self.body.get_xpath_match_string(
31
- "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
32
- {
33
- "uk": "https://caselaw.nationalarchives.gov.uk/akn",
34
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
35
- },
36
- )
28
+ def neutral_citation(self) -> Optional[NeutralCitationString]:
29
+ value_in_xml = self.body.get_xpath_match_string(
30
+ "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
31
+ {
32
+ "uk": "https://caselaw.nationalarchives.gov.uk/akn",
33
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
34
+ },
37
35
  )
38
-
39
- @property
40
- def best_human_identifier(self) -> str:
41
- return self.neutral_citation
36
+ if value_in_xml:
37
+ return NeutralCitationString(value_in_xml)
38
+ return None
42
39
 
43
40
  @cached_property
44
41
  def linked_document(self) -> Optional["PressSummary"]:
@@ -1,9 +1,10 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from functools import cached_property
3
- from typing import Any
3
+ from typing import Any, Optional
4
4
 
5
5
  from ds_caselaw_utils import neutral_url
6
6
  from ds_caselaw_utils.types import NeutralCitationString
7
+ from typing_extensions import deprecated
7
8
 
8
9
 
9
10
  class NeutralCitationMixin(ABC):
@@ -38,12 +39,15 @@ class NeutralCitationMixin(ABC):
38
39
 
39
40
  @cached_property
40
41
  @abstractmethod
41
- def neutral_citation(self) -> NeutralCitationString: ...
42
+ @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
43
+ def neutral_citation(self) -> Optional[NeutralCitationString]: ...
42
44
 
43
45
  @cached_property
46
+ @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
44
47
  def has_ncn(self) -> bool:
45
- return bool(self.neutral_citation)
48
+ return self.neutral_citation is not None and self.neutral_citation != ""
46
49
 
47
50
  @cached_property
51
+ @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
48
52
  def has_valid_ncn(self) -> bool:
49
- return self.has_ncn and neutral_url(self.neutral_citation) is not None
53
+ return self.neutral_citation is not None and neutral_url(self.neutral_citation) is not None
@@ -27,19 +27,16 @@ class PressSummary(NeutralCitationMixin, Document):
27
27
  super().__init__(self.document_noun, uri, *args, **kwargs)
28
28
 
29
29
  @cached_property
30
- def neutral_citation(self) -> NeutralCitationString:
31
- return NeutralCitationString(
32
- self.body.get_xpath_match_string(
33
- "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
34
- {
35
- "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
36
- },
37
- )
30
+ def neutral_citation(self) -> Optional[NeutralCitationString]:
31
+ value_in_xml = self.body.get_xpath_match_string(
32
+ "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
33
+ {
34
+ "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
35
+ },
38
36
  )
39
-
40
- @property
41
- def best_human_identifier(self) -> str:
42
- return self.neutral_citation
37
+ if value_in_xml:
38
+ return NeutralCitationString(value_in_xml)
39
+ return None
43
40
 
44
41
  @cached_property
45
42
  def linked_document(self) -> Optional[Judgment]:
@@ -251,8 +251,6 @@ def request_parse(
251
251
  },
252
252
  }
253
253
 
254
- # breakpoint()
255
-
256
254
  client.publish(
257
255
  TopicArn=env("REPARSE_SNS_TOPIC"),
258
256
  Message=json.dumps(message_to_send),
@@ -0,0 +1,14 @@
1
+ xquery version "1.0-ml";
2
+ declare option xdmp:transaction-mode "update";
3
+
4
+ let $_ := xdmp:set-transaction-mode("update")
5
+ let $state_doc := fn:doc("state.xml")
6
+ let $counter_node := $state_doc/state/document_counter
7
+
8
+ let $current_counter := $counter_node/text()
9
+ let $new_counter := fn:sum(($current_counter, 1))
10
+
11
+ let $_ := xdmp:node-replace($counter_node, <document_counter>{$new_counter}</document_counter>)
12
+ let $_ := xdmp:commit()
13
+
14
+ return $new_counter
@@ -0,0 +1,17 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare namespace xdmp="http://marklogic.com/xdmp";
4
+ declare variable $identifier_uri as xs:string external;
5
+ declare variable $published_only as xs:int? external := 1;
6
+
7
+ let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
8
+ let $query := "SELECT * from compiled_url_slugs WHERE (identifier_slug = @uri)" || $published_query
9
+
10
+ return xdmp:sql(
11
+ $query,
12
+ "map",
13
+ map:new((
14
+ map:entry("uri", $identifier_uri)
15
+ ))
16
+ )
17
+
@@ -141,6 +141,12 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
141
141
  uri: MarkLogicDocumentURIString
142
142
 
143
143
 
144
+ # resolve_from_identifier.xqy
145
+ class ResolveFromIdentifierDict(MarkLogicAPIDict):
146
+ identifier_uri: DocumentURIString
147
+ published_only: Optional[int]
148
+
149
+
144
150
  # set_boolean_property.xqy
145
151
  class SetBooleanPropertyDict(MarkLogicAPIDict):
146
152
  name: str