ds-caselaw-marklogic-api-client 28.2.0__py3-none-any.whl → 29.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ds-caselaw-marklogic-api-client might be problematic. Click here for more details.

caselawclient/Client.py CHANGED
@@ -20,7 +20,6 @@ from requests_toolbelt.multipart import decoder
20
20
 
21
21
  from caselawclient import xquery_type_dicts as query_dicts
22
22
  from caselawclient.client_helpers import VersionAnnotation
23
- from caselawclient.identifier_resolution import IdentifierResolutions
24
23
  from caselawclient.models.documents import (
25
24
  DOCUMENT_COLLECTION_URI_JUDGMENT,
26
25
  DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
@@ -1202,22 +1201,3 @@ class MarklogicApiClient:
1202
1201
  )
1203
1202
 
1204
1203
  return results
1205
-
1206
- def resolve_from_identifier(self, identifier_uri: str, published_only: bool = True) -> IdentifierResolutions:
1207
- """Given a PUI/EUI url, look up the precomputed slug and return the
1208
- MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
1209
- vars: query_dicts.ResolveFromIdentifierDict = {
1210
- "identifier_uri": DocumentURIString(identifier_uri),
1211
- "published_only": int(published_only),
1212
- }
1213
- raw_results: list[str] = get_multipart_strings_from_marklogic_response(
1214
- self._send_to_eval(
1215
- vars,
1216
- "resolve_from_identifier.xqy",
1217
- ),
1218
- )
1219
- return IdentifierResolutions.from_marklogic_output(raw_results)
1220
-
1221
- def get_next_document_sequence_number(self) -> int:
1222
- """Increment the MarkLogic sequence number by one and return the value."""
1223
- return int(self._eval_and_decode({}, "get_next_document_sequence_number.xqy"))
@@ -15,7 +15,6 @@ from caselawclient.errors import (
15
15
  NotSupportedOnVersion,
16
16
  OnlySupportedOnVersion,
17
17
  )
18
- from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
19
18
  from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
20
19
  from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
21
20
  from caselawclient.models.utilities.aws import (
@@ -433,12 +432,6 @@ class Document:
433
432
  if not self.is_publishable:
434
433
  raise CannotPublishUnpublishableDocument
435
434
 
436
- ## If it doesn't already have one, get a new FCLID for this document and assign
437
- if len(self.identifiers.of_type(FindCaseLawIdentifier)) < 1:
438
- document_fclid = FindCaseLawIdentifierSchema.mint(self.api_client)
439
- self.identifiers.add(document_fclid)
440
- self.save_identifiers()
441
-
442
435
  publish_documents(uri_for_s3(self.uri))
443
436
  self.api_client.set_published(self.uri, True)
444
437
  announce_document_event(
@@ -537,11 +530,6 @@ class Document:
537
530
  """
538
531
  return self.docx_exists()
539
532
 
540
- def save_identifiers(self) -> None:
541
- """Save the current state of this Document's identifiers to MarkLogic."""
542
- self.identifiers.validate()
543
- self.api_client.set_property_as_node(self.uri, "identifiers", self.identifiers.as_etree)
544
-
545
533
  def __getattr__(self, name: str) -> Any:
546
534
  warnings.warn(f"{name} no longer exists on Document, using Document.body instead", DeprecationWarning)
547
535
  try:
@@ -101,10 +101,6 @@ class Identifier(ABC):
101
101
  def url_slug(self) -> str:
102
102
  return self.schema.compile_identifier_url_slug(self.value)
103
103
 
104
- def same_as(self, other: "Identifier") -> bool:
105
- "Is this the same as another identifier (in value and schema)?"
106
- return self.value == other.value and self.schema == other.schema
107
-
108
104
 
109
105
  class Identifiers(dict[str, Identifier]):
110
106
  def validate(self) -> None:
@@ -113,13 +109,8 @@ class Identifiers(dict[str, Identifier]):
113
109
  msg = "Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"
114
110
  raise UUIDMismatchError(msg)
115
111
 
116
- def contains(self, other_identifier: Identifier) -> bool:
117
- "Do the identifier's value and namespace already exist in this group?"
118
- return any(other_identifier.same_as(identifier) for identifier in self.values())
119
-
120
112
  def add(self, identifier: Identifier) -> None:
121
- if not self.contains(identifier):
122
- self[identifier.uuid] = identifier
113
+ self[identifier.uuid] = identifier
123
114
 
124
115
  def __delitem__(self, key: Union[Identifier, str]) -> None:
125
116
  if isinstance(key, Identifier):
@@ -127,19 +118,6 @@ class Identifiers(dict[str, Identifier]):
127
118
  else:
128
119
  super().__delitem__(key)
129
120
 
130
- def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
131
- """Return a list of all identifiers of a given type."""
132
- uuids = self.keys()
133
- return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
134
-
135
- def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
136
- "For when we want an identifier to be the only valid identifier of that type, delete the others first"
137
- uuids = self.keys()
138
- for uuid in list(uuids):
139
- # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
140
- if isinstance(self[uuid], deleted_identifier_type):
141
- del self[uuid]
142
-
143
121
  @property
144
122
  def as_etree(self) -> etree._Element:
145
123
  """Return an etree representation of all the Document's identifiers."""
@@ -149,3 +127,8 @@ class Identifiers(dict[str, Identifier]):
149
127
  identifiers_root.append(identifier.as_xml_tree)
150
128
 
151
129
  return identifiers_root
130
+
131
+ def save(self, document) -> None: # type: ignore[no-untyped-def, unused-ignore]
132
+ """Save the current state of this Document's identifiers to MarkLogic."""
133
+ self.validate()
134
+ document.api_client.set_property_as_node(document.uri, "identifiers", self.as_etree)
@@ -3,11 +3,9 @@ from typing import Optional
3
3
  from lxml import etree
4
4
 
5
5
  from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier, Identifiers, InvalidIdentifierXMLRepresentationException
6
- from .fclid import FindCaseLawIdentifier
7
6
  from .neutral_citation import NeutralCitationNumber
8
7
 
9
8
  IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = {
10
- "fclid": FindCaseLawIdentifier,
11
9
  "ukncn": NeutralCitationNumber,
12
10
  }
13
11
 
@@ -141,12 +141,6 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
141
141
  uri: MarkLogicDocumentURIString
142
142
 
143
143
 
144
- # resolve_from_identifier.xqy
145
- class ResolveFromIdentifierDict(MarkLogicAPIDict):
146
- identifier_uri: DocumentURIString
147
- published_only: Optional[int]
148
-
149
-
150
144
  # set_boolean_property.xqy
151
145
  class SetBooleanPropertyDict(MarkLogicAPIDict):
152
146
  name: str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 28.2.0
3
+ Version: 29.0.0a2
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Home-page: https://github.com/nationalarchives/ds-caselaw-custom-api-client
6
6
  Keywords: national archives,caselaw
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
14
- Requires-Dist: certifi (>=2024.12.14,<2024.13.0)
14
+ Requires-Dist: certifi (>=2024.8.30,<2024.9.0)
15
15
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
16
16
  Requires-Dist: django-environ (>=0.11.0,<0.12.0)
17
17
  Requires-Dist: ds-caselaw-utils (>=2.0.0,<3.0.0)
@@ -25,7 +25,6 @@ Requires-Dist: pytz (>=2024.1,<2025.0)
25
25
  Requires-Dist: requests (>=2.28.2,<3.0.0)
26
26
  Requires-Dist: requests-toolbelt (>=0.10.1,<1.1.0)
27
27
  Requires-Dist: saxonche (>=12.5.0,<13.0.0)
28
- Requires-Dist: sqids (>=0.5.0,<0.6.0)
29
28
  Requires-Dist: typing-extensions (>=4.7.1,<5.0.0)
30
29
  Description-Content-Type: text/markdown
31
30
 
@@ -1,22 +1,20 @@
1
- caselawclient/Client.py,sha256=6BaVpKSvMcNhSaDXzTkN1h4iV7KFrY7SgSHNGqGQ_2o,43863
1
+ caselawclient/Client.py,sha256=BD6XvObJHILh8C4NvLJUpkEUJ6KpxHOHKokJOrYWaKY,42818
2
2
  caselawclient/__init__.py,sha256=DY-caubLDQWWingSdsBWgovDNXh8KcnkI6kwz08eIFk,612
3
3
  caselawclient/client_helpers/__init__.py,sha256=fyDNKCdrTb2N0Ks23YDhmvlXKfLTHnYQCXhnZb-QQbg,3832
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
5
5
  caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
6
6
  caselawclient/errors.py,sha256=tV0vs3wYSd331BzmfuRiZV6GAdsd91rtN65ymRaSx3s,3164
7
7
  caselawclient/factories.py,sha256=6-xZMVmvtXA8AnyWJgJTums1EWfM6lPIhrWQu0NopJo,4472
8
- caselawclient/identifier_resolution.py,sha256=IOqrZcIHoHhNOCAkNveOBcWddBNpkOB8cz1r0zFa8mQ,1829
9
8
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
10
- caselawclient/models/documents/__init__.py,sha256=DS2jqj4ShRHxAWIqQeeyCCcXctLor4xUvT3sc903D2E,19186
9
+ caselawclient/models/documents/__init__.py,sha256=ZrdtHZ1M8kTODD2Zky6EdUjj8r0KQKIZ6nWtXmuZKuo,18506
11
10
  caselawclient/models/documents/body.py,sha256=mtdjmG1WU2qSpyRLS8-PWcSoXpDa2Qz6xlcTbxZgxvA,5603
12
11
  caselawclient/models/documents/exceptions.py,sha256=rw1xId16vBKvBImgFmFUpeFgKqU7VTNtVLIEVBPGKyk,374
13
12
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
14
13
  caselawclient/models/documents/transforms/html.xsl,sha256=oSSO-IBX4qLiSWexQYmWJfGNevF09aCBx4D1NYqXxpo,38322
15
14
  caselawclient/models/documents/xml.py,sha256=HlmPb63lLMnySSOLP4iexcAyQiLByKBZtTd25f8sY8M,1268
16
- caselawclient/models/identifiers/__init__.py,sha256=CxKu5iZqPb6BOjrQ3upkzyjU3YSsJhBZd7VATYxDI-A,5061
17
- caselawclient/models/identifiers/fclid.py,sha256=pAxZKKlKRSHwJqrEHOSlbuCt3gBOgS3sNv98bBjuNBc,1295
15
+ caselawclient/models/identifiers/__init__.py,sha256=FSrhDXMkjgaYnOlBaTDcz9ouzL9ERxEhlqud0fdVT8A,4151
18
16
  caselawclient/models/identifiers/neutral_citation.py,sha256=yddlfumdnkrNpoTIOf8dB1foA7hE41-zmlfa17-Ulug,1790
19
- caselawclient/models/identifiers/unpacker.py,sha256=01gWWlOd_2PxT1GJzOIXbp0G4iGGIxHaTaJMBOQ1TDs,1834
17
+ caselawclient/models/identifiers/unpacker.py,sha256=V79m4rd0FZx5TRueL1m3MbrSUWO8c0f5NoNxSP3dlFw,1757
20
18
  caselawclient/models/judgments.py,sha256=NVOg4ZTU7Jtr33UuswL2TXCaN6_W0fKFPK4EdQ-jUhE,1915
21
19
  caselawclient/models/neutral_citation_mixin.py,sha256=5ktKCPIDidVRwxVTzx5e242O1BxOdP--1dnatZyTbYI,1773
22
20
  caselawclient/models/press_summaries.py,sha256=06flQ8wSLnNxoQtXO0ckmotFKszYZcub0oPcDzYbVQw,1879
@@ -46,7 +44,6 @@ caselawclient/xquery/get_judgment.xqy,sha256=8V-sEFKmtpf2LIZD9QKVRfpblEsmDpP4BA6
46
44
  caselawclient/xquery/get_judgment_checkout_status.xqy,sha256=mdY9UXLyzQdB7byEERPqentlr0YDLbXRVqH0h4UuZTQ,193
47
45
  caselawclient/xquery/get_judgment_version.xqy,sha256=wF9k9-CBrqo8VbxxyTrD-AGzR3-3jMm25tRVCjxPLrU,292
48
46
  caselawclient/xquery/get_last_modified.xqy,sha256=8fCm_7o_kkytCEmEeSTLWzLP7iOjuPV01IfHDgf6HaQ,172
49
- caselawclient/xquery/get_next_document_sequence_number.xqy,sha256=LkGoaS7jZfaKDuZLi0apP5qHP1rpcM1HbqX3RUwquKY,450
50
47
  caselawclient/xquery/get_pending_enrichment_for_version.xqy,sha256=8J5Pi-jMXJd_BgtpK4g6C9uR99HP57JpFv5WkoPfNuo,2016
51
48
  caselawclient/xquery/get_pending_parse_for_version.xqy,sha256=9cjVZtHeBBjm-a7RMsn1PVJt_Ug78YFlmp5CN8VJ1jQ,1197
52
49
  caselawclient/xquery/get_properties_for_search_results.xqy,sha256=Tlv3EKwVV_q-JyQyhjWVHIleicPDpucxP4ScuQjpgSw,625
@@ -58,7 +55,6 @@ caselawclient/xquery/get_version_annotation.xqy,sha256=pFDMGA9SxI59iUPaoAeUsq23k
58
55
  caselawclient/xquery/get_version_created.xqy,sha256=bRweaXFtwMBNzL16SlOdiOxHkbqNUwpwDHLxpZYVCh0,250
59
56
  caselawclient/xquery/insert_document.xqy,sha256=iP2xTaLGa-u6X9KfS1yJ6yPCKQUWQFYdEW1S4YcMY7w,531
60
57
  caselawclient/xquery/list_judgment_versions.xqy,sha256=WShga8igeD21hSLfVSvCOiDMPDhNH6KGf1OW6G0SAkY,190
61
- caselawclient/xquery/resolve_from_identifier.xqy,sha256=Fa-RSw9ZwD__BmT5LLJ0J0HcDstDbedkEccv45M3L4g,484
62
58
  caselawclient/xquery/set_boolean_property.xqy,sha256=8Vg3yDWqeDynUJQHw2OF4daDIKTnp8ARol1_OCqY0Dk,355
63
59
  caselawclient/xquery/set_metadata_citation.xqy,sha256=ImwijXowvOCiH_br_LepnKsEpys9tg4Cf3uz6MoC5-c,659
64
60
  caselawclient/xquery/set_metadata_court.xqy,sha256=xQGR3e4pdJuDPMlzdAdzrBDSeQbEFiLVIm2z_KQI_Ds,996
@@ -76,8 +72,8 @@ caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4
76
72
  caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
77
73
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
78
74
  caselawclient/xquery/xslt_transform.xqy,sha256=smyFFxqmtkuOzBd2l7uw6K2oAsYctudrP8omdv_XNAM,2463
79
- caselawclient/xquery_type_dicts.py,sha256=kybL-YzwK34Fr6MeWfqVOJHYrs0ZNeDWXDsp8o2Yb1U,6114
80
- ds_caselaw_marklogic_api_client-28.2.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
81
- ds_caselaw_marklogic_api_client-28.2.0.dist-info/METADATA,sha256=i7v85V8SC6lm8oYboo4aGxSmVVaO4QZwj3WVSWDU5Cs,4272
82
- ds_caselaw_marklogic_api_client-28.2.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
83
- ds_caselaw_marklogic_api_client-28.2.0.dist-info/RECORD,,
75
+ caselawclient/xquery_type_dicts.py,sha256=TN-4jPUsus22yBUM6_e4ZaLvYZk_qIhz2gU3Eqg0aaY,5959
76
+ ds_caselaw_marklogic_api_client-29.0.0a2.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
77
+ ds_caselaw_marklogic_api_client-29.0.0a2.dist-info/METADATA,sha256=Tlh3W1mHe_3Th1A3U_TnBhxa64njIGMH6jot3Y_O6vE,4234
78
+ ds_caselaw_marklogic_api_client-29.0.0a2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ ds_caselaw_marklogic_api_client-29.0.0a2.dist-info/RECORD,,
@@ -1,43 +0,0 @@
1
- import json
2
- from typing import NamedTuple
3
-
4
- from caselawclient.models.documents import DocumentURIString
5
- from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString
6
-
7
-
8
- class IdentifierResolutions(list["IdentifierResolution"]):
9
- """
10
- A list of candidate MarkLogic documents which correspond to a Public UI uri
11
-
12
- MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary
13
- which corresponds to a single identifier to MarkLogic document mapping.
14
-
15
- see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py`
16
- """
17
-
18
- @staticmethod
19
- def from_marklogic_output(table: list[str]) -> "IdentifierResolutions":
20
- return IdentifierResolutions(list(IdentifierResolution.from_marklogic_output(row) for row in table))
21
-
22
- def published(self) -> "IdentifierResolutions":
23
- "Filter the list so that only published documents are returned"
24
- return IdentifierResolutions(list(x for x in self if x.document_published))
25
-
26
-
27
- class IdentifierResolution(NamedTuple):
28
- """A single response from MarkLogic about a single identifier / document mapping"""
29
-
30
- identifier_uuid: str
31
- document_uri: MarkLogicDocumentURIString
32
- identifier_slug: DocumentURIString
33
- document_published: bool
34
-
35
- @staticmethod
36
- def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
37
- row = json.loads(raw_row)
38
- return IdentifierResolution(
39
- identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
40
- document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
41
- identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
42
- document_published=row["documents.compiled_url_slugs.document_published"] == "true",
43
- )
@@ -1,48 +0,0 @@
1
- import re
2
- from typing import TYPE_CHECKING
3
-
4
- from sqids import Sqids
5
-
6
- from . import Identifier, IdentifierSchema
7
-
8
- if TYPE_CHECKING:
9
- from caselawclient.Client import MarklogicApiClient
10
-
11
-
12
- VALID_FCLID_PATTERN = re.compile(r"^[bcdfghjkmnpqrstvwxyz23456789]{4,}$")
13
-
14
- FCLID_MINIMUM_LENGTH = 8
15
- FCLID_ALPHABET = "bcdfghjkmnpqrstvwxyz23456789"
16
-
17
- sqids = Sqids(
18
- min_length=FCLID_MINIMUM_LENGTH,
19
- alphabet=FCLID_ALPHABET,
20
- )
21
-
22
-
23
- class FindCaseLawIdentifierSchema(IdentifierSchema):
24
- """
25
- Identifier schema describing a Find Case Law Identifier.
26
- """
27
-
28
- name = "Find Case Law Identifier"
29
- namespace = "fclid"
30
-
31
- @classmethod
32
- def validate_identifier(cls, value: str) -> bool:
33
- return bool(VALID_FCLID_PATTERN.match(value))
34
-
35
- @classmethod
36
- def compile_identifier_url_slug(cls, value: str) -> str:
37
- return "tna." + value
38
-
39
- @classmethod
40
- def mint(cls, api_client: "MarklogicApiClient") -> "FindCaseLawIdentifier":
41
- """Generate a totally new Find Case Law identifier."""
42
- next_sequence_number = api_client.get_next_document_sequence_number()
43
- new_identifier = sqids.encode([next_sequence_number])
44
- return FindCaseLawIdentifier(value=new_identifier)
45
-
46
-
47
- class FindCaseLawIdentifier(Identifier):
48
- schema = FindCaseLawIdentifierSchema
@@ -1,14 +0,0 @@
1
- xquery version "1.0-ml";
2
- declare option xdmp:transaction-mode "update";
3
-
4
- let $_ := xdmp:set-transaction-mode("update")
5
- let $state_doc := fn:doc("state.xml")
6
- let $counter_node := $state_doc/state/document_counter
7
-
8
- let $current_counter := $counter_node/text()
9
- let $new_counter := fn:sum(($current_counter, 1))
10
-
11
- let $_ := xdmp:node-replace($counter_node, <document_counter>{$new_counter}</document_counter>)
12
- let $_ := xdmp:commit()
13
-
14
- return $new_counter
@@ -1,17 +0,0 @@
1
- xquery version "1.0-ml";
2
-
3
- declare namespace xdmp="http://marklogic.com/xdmp";
4
- declare variable $identifier_uri as xs:string external;
5
- declare variable $published_only as xs:int? external := 1;
6
-
7
- let $published_query := if ($published_only) then " AND document_published = 'true'" else ""
8
- let $query := "SELECT * from compiled_url_slugs WHERE (identifier_slug = @uri)" || $published_query
9
-
10
- return xdmp:sql(
11
- $query,
12
- "map",
13
- map:new((
14
- map:entry("uri", $identifier_uri)
15
- ))
16
- )
17
-