ds-caselaw-marklogic-api-client 43.1.0__py3-none-any.whl → 44.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
caselawclient/Client.py CHANGED
@@ -1222,13 +1222,13 @@ class MarklogicApiClient:
1222
1222
 
1223
1223
  return (int(table[1][1]), int(table[1][2]))
1224
1224
 
1225
- def get_pending_parse_for_version(
1225
+ def get_documents_pending_parse_for_version(
1226
1226
  self,
1227
1227
  target_version: tuple[int, int],
1228
1228
  maximum_records: int = 1000,
1229
1229
  ) -> list[list[Any]]:
1230
- """Retrieve documents which are not yet parsed with a given version."""
1231
- vars: query_dicts.GetPendingParseForVersionDict = {
1230
+ """Retrieve a list of documents which are not yet parsed with a given version."""
1231
+ vars: query_dicts.GetPendingParseForVersionDocumentsDict = {
1232
1232
  "target_major_version": target_version[0],
1233
1233
  "target_minor_version": target_version[1],
1234
1234
  "maximum_records": maximum_records,
@@ -1237,13 +1237,33 @@ class MarklogicApiClient:
1237
1237
  get_single_string_from_marklogic_response(
1238
1238
  self._send_to_eval(
1239
1239
  vars,
1240
- "get_pending_parse_for_version.xqy",
1240
+ "get_pending_parse_for_version_documents.xqy",
1241
1241
  ),
1242
1242
  ),
1243
1243
  )
1244
1244
 
1245
1245
  return results
1246
1246
 
1247
+ def get_count_pending_parse_for_version(
1248
+ self,
1249
+ target_version: tuple[int, int],
1250
+ ) -> int:
1251
+ """Get the total number of documents which are not yet parsed with a given version."""
1252
+ vars: query_dicts.GetPendingParseForVersionCountDict = {
1253
+ "target_major_version": target_version[0],
1254
+ "target_minor_version": target_version[1],
1255
+ }
1256
+ results = json.loads(
1257
+ get_single_string_from_marklogic_response(
1258
+ self._send_to_eval(
1259
+ vars,
1260
+ "get_pending_parse_for_version_count.xqy",
1261
+ ),
1262
+ ),
1263
+ )
1264
+
1265
+ return int(results[1][0])
1266
+
1247
1267
  def get_recently_parsed(
1248
1268
  self,
1249
1269
  ) -> list[list[Any]]:
@@ -20,7 +20,7 @@ T = TypeVar("T")
20
20
 
21
21
  DEFAULT_DOCUMENT_BODY_XML = """<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0" xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn">
22
22
  <judgment name="decision">
23
- <meta/><header/>
23
+ <meta/><header><p>Header contains text</p></header>
24
24
  <judgmentBody>
25
25
  <decision>
26
26
  <p>This is a document.</p>
@@ -26,6 +26,7 @@ from caselawclient.models.utilities import VersionsDict, extract_version, render
26
26
  from caselawclient.models.utilities.aws import (
27
27
  ParserInstructionsDict,
28
28
  announce_document_event,
29
+ are_unpublished_assets_clean,
29
30
  check_docx_exists,
30
31
  delete_documents_from_private_bucket,
31
32
  generate_docx_url,
@@ -102,6 +103,11 @@ class Document:
102
103
  True,
103
104
  "There is another document with identical content",
104
105
  ),
106
+ (
107
+ "has_only_clean_assets",
108
+ True,
109
+ "An uncleaned asset exists for this document",
110
+ ),
105
111
  ]
106
112
  """
107
113
  A list of tuples in the form:
@@ -252,7 +258,7 @@ class Document:
252
258
  """
253
259
  if self.is_version:
254
260
  raise NotSupportedOnVersion(
255
- "Cannot get versions of a version for {self.uri}",
261
+ f"Cannot get versions of a version for {self.uri}",
256
262
  )
257
263
  docs = []
258
264
  for version in self.versions:
@@ -370,6 +376,12 @@ class Document:
370
376
  """Check if the content hash of this document is unique compared to all other documents in MarkLogic."""
371
377
  return self.api_client.has_unique_content_hash(self.uri)
372
378
 
379
+ @cached_property
380
+ def has_only_clean_assets(self) -> bool:
381
+ """False if any non-tar.gz assets associated with this document have not been cleaned."""
382
+ return True # TODO: Remove this once we have enabled the asset cleaning pipeline.
383
+ return are_unpublished_assets_clean(self.uri)
384
+
373
385
  @cached_property
374
386
  def version_created_datetime(self) -> datetime.datetime:
375
387
  return self.api_client.get_version_created_datetime(self.uri)
@@ -177,14 +177,9 @@ class DocumentBody:
177
177
  def has_content(self) -> bool:
178
178
  """If we do not have a word document, the XML will not contain
179
179
  the contents of the judgment, but will contain a preamble."""
180
- trailing_tags = self._xml.xml_as_tree.xpath("//*[preceding::akn:meta]", namespaces=DEFAULT_NAMESPACES)
181
- for tag in trailing_tags:
182
- if tag.tail and tag.tail.strip():
183
- return True
184
- if tag.text and tag.text.strip():
185
- return True
186
-
187
- return False
180
+ return bool(
181
+ self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
182
+ )
188
183
 
189
184
  @cached_property
190
185
  def has_external_data(self) -> bool:
@@ -11,7 +11,7 @@ declare variable $version_uri as xs:string? external;
11
11
  declare variable $search_query as xs:string? external;
12
12
 
13
13
  (: Note that `xsl:output method` is changed from `html` to `xml` and we've namespaced the tag :)
14
- let $number_marks_xslt := (
14
+ let $delete_meta_marks_xslt := (
15
15
  <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
16
16
  xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn"
17
17
  xmlns:akn="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"
@@ -25,16 +25,6 @@ let $number_marks_xslt := (
25
25
  <xsl:template match="//akn:meta//uk:mark">
26
26
  <xsl:apply-templates />
27
27
  </xsl:template>
28
- <xsl:template match="uk:mark">
29
- <xsl:copy>
30
- <xsl:copy-of select="@*" />
31
- <xsl:attribute name="id">
32
- <xsl:text>mark_</xsl:text>
33
- <xsl:number count="//uk:mark" level="any" from="//*[ancestor::akn:meta]" />
34
- </xsl:attribute>
35
- <xsl:apply-templates />
36
- </xsl:copy>
37
- </xsl:template>
38
28
  </xsl:stylesheet>
39
29
  )
40
30
 
@@ -56,7 +46,7 @@ let $raw_xml := if ($show_unpublished) then
56
46
  (: If a search query string is present, highlight instances :)
57
47
  let $transformed := if($search_query) then
58
48
  xdmp:xslt-eval(
59
- $number_marks_xslt,
49
+ $delete_meta_marks_xslt,
60
50
  cts:highlight(
61
51
  $raw_xml,
62
52
  helper:make-q-query($search_query),
@@ -0,0 +1,26 @@
1
+ xquery version "1.0-ml";
2
+
3
+ declare variable $target_major_version as xs:int external;
4
+ declare variable $target_minor_version as xs:int external;
5
+
6
+ xdmp:to-json(xdmp:sql(
7
+ "SELECT COUNT(*) as count
8
+ FROM (
9
+ SELECT
10
+ process_data.uri,
11
+ parser_version_string, parser_major_version, parser_minor_version,
12
+ DATEDIFF('minute', last_sent_to_parser, CURRENT_TIMESTAMP) AS minutes_since_parse_request
13
+ FROM documents.process_data
14
+ JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
15
+ )
16
+ WHERE (
17
+ (parser_version_string IS NULL) OR
18
+ (parser_major_version <= @target_major_version AND parser_minor_version < @target_minor_version)
19
+ )
20
+ AND (minutes_since_parse_request > 43200 OR minutes_since_parse_request IS NULL)",
21
+ "array",
22
+ map:new((
23
+ map:entry("target_major_version", $target_major_version),
24
+ map:entry("target_minor_version", $target_minor_version)
25
+ ))
26
+ ))
@@ -19,7 +19,7 @@ xdmp:to-json(xdmp:sql(
19
19
  (parser_major_version <= @target_major_version AND parser_minor_version < @target_minor_version)
20
20
  )
21
21
  AND (minutes_since_parse_request > 43200 OR minutes_since_parse_request IS NULL)
22
- ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC
22
+ ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC, minutes_since_parse_request DESC
23
23
  LIMIT @maximum_records",
24
24
  "array",
25
25
  map:new((
@@ -28,4 +28,4 @@ xdmp:to-json(xdmp:sql(
28
28
  map:entry("maximum_records", $maximum_records)
29
29
 
30
30
  ))
31
- ))
31
+ ))
@@ -17,27 +17,6 @@ let $xsl_path := fn:concat("judgments/xslts/", $xsl_filename)
17
17
 
18
18
  let $params := map:map()
19
19
 
20
- let $number_marks_xslt := (
21
- <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
22
- version="2.0">
23
- <xsl:output method="html" />
24
- <xsl:template match="@*|node()">
25
- <xsl:copy>
26
- <xsl:apply-templates select="@*|node()"/>
27
- </xsl:copy>
28
- </xsl:template>
29
- <xsl:template match="mark">
30
- <xsl:copy>
31
- <xsl:copy-of select="*" />
32
- <xsl:attribute name="id">
33
- <xsl:text>mark_</xsl:text>
34
- <xsl:value-of select="count(preceding::mark)"/>
35
- </xsl:attribute>
36
- <xsl:apply-templates />
37
- </xsl:copy>
38
- </xsl:template>
39
- </xsl:stylesheet>
40
- )
41
20
  (: change the image-base of the document to match the location of the assets in $image_base
42
21
  so that references to images point to the correct places on the internet :)
43
22
  let $_put := map:put(
@@ -59,13 +38,10 @@ let $retrieved_value := if (xs:boolean($is_published) or $show_unpublished) then
59
38
  ()
60
39
 
61
40
  let $return_value := if($query) then
62
- xdmp:xslt-eval(
63
- $number_marks_xslt,
64
- cts:highlight(
65
- $retrieved_value,
66
- helper:make-q-query($query),
67
- <mark>{$cts:text}</mark>
68
- )
41
+ cts:highlight(
42
+ $retrieved_value,
43
+ helper:make-q-query($query),
44
+ <mark>{$cts:text}</mark>
69
45
  )
70
46
  else
71
47
  $retrieved_value
@@ -105,8 +105,14 @@ class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
105
105
  target_parser_minor_version: int
106
106
 
107
107
 
108
- # get_pending_parse_for_version.xqy
109
- class GetPendingParseForVersionDict(MarkLogicAPIDict):
108
+ # get_pending_parse_for_version_count.xqy
109
+ class GetPendingParseForVersionCountDict(MarkLogicAPIDict):
110
+ target_major_version: int
111
+ target_minor_version: int
112
+
113
+
114
+ # get_pending_parse_for_version_documents.xqy
115
+ class GetPendingParseForVersionDocumentsDict(MarkLogicAPIDict):
110
116
  maximum_records: Optional[int]
111
117
  target_major_version: int
112
118
  target_minor_version: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ds-caselaw-marklogic-api-client
3
- Version: 43.1.0
3
+ Version: 44.0.3
4
4
  Summary: An API client for interacting with the underlying data in Find Caselaw.
5
5
  Keywords: national archives,caselaw
6
6
  Author: The National Archives
@@ -9,7 +9,7 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python :: 3.13
11
11
  Requires-Dist: boto3 (>=1.26.112,<2.0.0)
12
- Requires-Dist: certifi (>=2025.10.5,<2025.11.0)
12
+ Requires-Dist: certifi (>=2025.11.12,<2025.12.0)
13
13
  Requires-Dist: charset-normalizer (>=3.0.0,<4.0.0)
14
14
  Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
15
15
  Requires-Dist: django-environ (>=0.12.0)
@@ -1,17 +1,17 @@
1
- caselawclient/Client.py,sha256=OZ7ee3QVvN59go2oHxndgRgAKSMaVAn-mmG7E9rpPgg,47415
1
+ caselawclient/Client.py,sha256=aKe_SvSXpSIgHCc53DfQY6mgK4QyrUDqyPWLByzGxLI,48138
2
2
  caselawclient/__init__.py,sha256=QZtsOB_GR5XfFnWMJ6E9_fBany-JXFIrQmzs1mD_KVg,1225
3
3
  caselawclient/client_helpers/__init__.py,sha256=tpXWjwBAqOf8ChtSiEeMhdkiO7tVbfZ4FfQXsXaGJlI,1221
4
4
  caselawclient/client_helpers/search_helpers.py,sha256=R99HyRLeYHgsw2L3DOidEqlKLLvs6Tga5rKTuWQViig,1525
5
5
  caselawclient/content_hash.py,sha256=0cPC4OoABq0SC2wYFX9-24DodNigeOqksDxgxQH_hUA,2221
6
6
  caselawclient/errors.py,sha256=JC16fEGq_MRJX-_KFzfINCV2Cqx8o6OWOt3C16rQd84,3142
7
- caselawclient/factories.py,sha256=HXJeWpN7__X462joco07ziNqMOOMr-wUPJ91Y69gFk8,7466
7
+ caselawclient/factories.py,sha256=5AiRrvtnvCkvHLU9SzD9MRZPKspEomFw9M2LDZjveJY,7501
8
8
  caselawclient/identifier_resolution.py,sha256=B5I1sD7o7YjzsXMECjbKjgiGLDda5bGhejsJ-lYpTIg,2429
9
9
  caselawclient/managers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  caselawclient/managers/merge/__init__.py,sha256=Rd6YIGifT3TP6UOf0gBrRoYzK5MJqTPeRaIJznS8dgI,2078
11
11
  caselawclient/managers/merge/checks.py,sha256=J9RBG6jZAKIZk27jdFq-BByoRLKWsViCfHDyA8ZM3qU,3205
12
12
  caselawclient/models/__init__.py,sha256=kd23EUpvaC7aLHdgk8farqKAQEx3lf7RvNT2jEatvlg,68
13
- caselawclient/models/documents/__init__.py,sha256=6OBZfMy_6FgLQ6ZnMvjKv1Poiq-UP7pb2TcxHx-uSTI,24705
14
- caselawclient/models/documents/body.py,sha256=pzk3bm9FGIWfI0Hs8dBuzk6RCiA9M4GHfgOYKpNlzyE,8455
13
+ caselawclient/models/documents/__init__.py,sha256=nC1CoYUDVAlAgLT4vWdqmNj3yrpoUs7C73H588_PTSA,25185
14
+ caselawclient/models/documents/body.py,sha256=IGRJcolJlkW_KWufNy4lLB97uPf9Ghhn6i_s0sptUrk,8286
15
15
  caselawclient/models/documents/comparison.py,sha256=KwFZQByOcYcZKe8csjAntttACKq4BZb28n2VeV5rK54,1355
16
16
  caselawclient/models/documents/exceptions.py,sha256=te7PPQTDHjZ9EYVg5pVaiZfF00lMBFy333PHj8_mkC4,443
17
17
  caselawclient/models/documents/statuses.py,sha256=Cp4dTQmJOtsU41EJcxy5dV1841pGD2PNWH0VrkDEv4Q,579
@@ -53,14 +53,15 @@ caselawclient/xquery/get_combined_stats_table.xqy,sha256=cclNqSzIB6sX6A_hgVOFZon
53
53
  caselawclient/xquery/get_components_for_document.xqy,sha256=qBOn5OI7ThK0OHizSm68oySfAdp2dsHFJaIMTI4iIC0,769
54
54
  caselawclient/xquery/get_highest_enrichment_version.xqy,sha256=a0dwVmEZuIMyRjIlvenSmbOaaN0WvgaCZvMtVWoLulQ,247
55
55
  caselawclient/xquery/get_highest_parser_version.xqy,sha256=LW3iSg4eWArbfBaCVWWOpr4MoUcDBz514nV48ElOsAM,247
56
- caselawclient/xquery/get_judgment.xqy,sha256=34fMHSBNPcXY3u3QIX3Bul8DMuWyGGHrhuYTzxGt3wk,2420
56
+ caselawclient/xquery/get_judgment.xqy,sha256=YMDDTOvT2FaYXqhYPcJVNn95czDoDojauVJiSj1bVG0,2074
57
57
  caselawclient/xquery/get_judgment_checkout_status.xqy,sha256=mdY9UXLyzQdB7byEERPqentlr0YDLbXRVqH0h4UuZTQ,193
58
58
  caselawclient/xquery/get_judgment_version.xqy,sha256=wF9k9-CBrqo8VbxxyTrD-AGzR3-3jMm25tRVCjxPLrU,292
59
59
  caselawclient/xquery/get_last_modified.xqy,sha256=8fCm_7o_kkytCEmEeSTLWzLP7iOjuPV01IfHDgf6HaQ,172
60
60
  caselawclient/xquery/get_missing_fclid.xqy,sha256=FAZZMtqow0VAf1D9LjBydT9kcOxiEIQC7GQgs4o68sA,520
61
61
  caselawclient/xquery/get_next_document_sequence_number.xqy,sha256=LkGoaS7jZfaKDuZLi0apP5qHP1rpcM1HbqX3RUwquKY,450
62
62
  caselawclient/xquery/get_pending_enrichment_for_version.xqy,sha256=8J5Pi-jMXJd_BgtpK4g6C9uR99HP57JpFv5WkoPfNuo,2016
63
- caselawclient/xquery/get_pending_parse_for_version.xqy,sha256=9cjVZtHeBBjm-a7RMsn1PVJt_Ug78YFlmp5CN8VJ1jQ,1197
63
+ caselawclient/xquery/get_pending_parse_for_version_count.xqy,sha256=8OrpwjfW3daG1wPox9Xyc5A6kqRfKFQZ8ogTqB8VIPA,928
64
+ caselawclient/xquery/get_pending_parse_for_version_documents.xqy,sha256=g5r5Lnyi3r7eNOnX0eIYRTVBZjPd43KQRyrIAS8pIZY,1232
64
65
  caselawclient/xquery/get_properties_for_search_results.xqy,sha256=Tlv3EKwVV_q-JyQyhjWVHIleicPDpucxP4ScuQjpgSw,625
65
66
  caselawclient/xquery/get_property.xqy,sha256=RHlOTrK0aH-S7s_ykYzGmUeKOJxXlI4vE5sKRt556NY,209
66
67
  caselawclient/xquery/get_property_as_node.xqy,sha256=7EXNgjVD1QugJ1621pvg8PdjBRIuh7GugwARv04TuBk,202
@@ -89,11 +90,11 @@ caselawclient/xquery/user_has_role.xqy,sha256=52YuFZnXqaDDJs-j_UanpqcLNEiw_m9xb0
89
90
  caselawclient/xquery/validate_all_documents.xqy,sha256=z_0YEXmRcZ-FaJM0ouKiTjdI4tLKQ4FTssRihR07qFk,156
90
91
  caselawclient/xquery/validate_document.xqy,sha256=PgaDcnqCRJPIVqfmWsNlXmCLNKd21qkJrvY1RtNP7eA,140
91
92
  caselawclient/xquery/xslt.xqy,sha256=w57wNijH3dkwHkpKeAxqjlghVflQwo8cq6jS_sm-erM,199
92
- caselawclient/xquery/xslt_transform.xqy,sha256=cccaFiGkCcvSfDv007UriZ3I4ak2nTLP1trRZdbOoS8,2462
93
- caselawclient/xquery_type_dicts.py,sha256=f4PM8yZi5RRMdL2lQ8tsLUs0aJjBa5chvd-VVj40fJY,6767
93
+ caselawclient/xquery/xslt_transform.xqy,sha256=3X8f7u5kRXKRMwnfZ2AO60LS9F3Gi3mFp_MHrw95x0w,1745
94
+ caselawclient/xquery_type_dicts.py,sha256=caNLrQBytQFxfdVs5gpSTQEo-FEldKITZDqZtITKWJQ,6950
94
95
  caselawclient/xslt/modify_xml_live.xsl,sha256=gNjwBun2-UzOeeuf0wNjFtN3jXm1yrwqv_KT8r1slXw,2370
95
96
  caselawclient/xslt/sample.xsl,sha256=IG-v77stjwqiw25pguh391K-5DTKiX651WqILDZixm0,825
96
- ds_caselaw_marklogic_api_client-43.1.0.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
97
- ds_caselaw_marklogic_api_client-43.1.0.dist-info/METADATA,sha256=P-ddB6xJtOQa9vfR5Yl5iFIxXZL2mOgeLRg-YxVQRMY,4408
98
- ds_caselaw_marklogic_api_client-43.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
99
- ds_caselaw_marklogic_api_client-43.1.0.dist-info/RECORD,,
97
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/LICENSE.md,sha256=fGMzyyLuQW-IAXUeDSCrRdsYW536aEWThdbpCjo6ZKg,1108
98
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/METADATA,sha256=32qweXun6XgSOZhqDF4y-wKtPV9y90LrQsrL-IaSVv8,4409
99
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
100
+ ds_caselaw_marklogic_api_client-44.0.3.dist-info/RECORD,,