hpcflow-new2 0.2.0a221__py3-none-any.whl → 0.2.0a223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hpcflow/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.0a221"
1
+ __version__ = "0.2.0a223"
@@ -18,6 +18,7 @@ from numpy.ma.core import MaskedArray
18
18
  import zarr # type: ignore
19
19
  from zarr.errors import BoundsCheckError # type: ignore
20
20
  from zarr.storage import DirectoryStore, FSStore # type: ignore
21
+ from zarr.util import guess_chunks # type: ignore
21
22
  from fsspec.implementations.zip import ZipFileSystem # type: ignore
22
23
  from rich.console import Console
23
24
  from numcodecs import MsgPack, VLenArray, blosc, Blosc, Zstd # type: ignore
@@ -59,6 +60,7 @@ from hpcflow.sdk.submission.submission import (
59
60
  SUBMISSION_SUBMIT_TIME_KEYS,
60
61
  )
61
62
  from hpcflow.sdk.utils.arrays import get_2D_idx, split_arr
63
+ from hpcflow.sdk.utils.patches import override_module_attrs
62
64
  from hpcflow.sdk.utils.strings import shorten_list_str
63
65
 
64
66
  if TYPE_CHECKING:
@@ -89,6 +91,11 @@ if TYPE_CHECKING:
89
91
  ListAny: TypeAlias = "list[Any]"
90
92
  #: Zarr attribute mapping context.
91
93
  ZarrAttrs: TypeAlias = "dict[str, Any]"
94
+ #: Soft lower limit for the number of bytes in an array chunk
95
+ _ARRAY_CHUNK_MIN: int = 500 * 1024 * 1024 # 500 MiB
96
+ #: Hard upper limit for the number of bytes in an array chunk. Should be lower than the
97
+ #: maximum buffer size of the blosc encoder, if we're using it (2 GiB)
98
+ _ARRAY_CHUNK_MAX: int = 1024 * 1024 * 1024 # 1 GiB
92
99
  _JS: TypeAlias = "dict[str, list[dict[str, dict]]]"
93
100
 
94
101
 
@@ -124,7 +131,13 @@ def _encode_numpy_array(
124
131
  new_idx = (
125
132
  max((int(i.removeprefix("arr_")) for i in param_arr_group.keys()), default=-1) + 1
126
133
  )
127
- param_arr_group.create_dataset(name=f"arr_{new_idx}", data=obj, chunks=obj.shape)
134
+ with override_module_attrs(
135
+ "zarr.util", {"CHUNK_MIN": _ARRAY_CHUNK_MIN, "CHUNK_MAX": _ARRAY_CHUNK_MAX}
136
+ ):
137
+ # `guess_chunks` also ensures chunk shape is at least 1 in each dimension:
138
+ chunk_shape = guess_chunks(obj.shape, obj.dtype.itemsize)
139
+
140
+ param_arr_group.create_dataset(name=f"arr_{new_idx}", data=obj, chunks=chunk_shape)
128
141
  type_lookup["arrays"].append([path, new_idx])
129
142
 
130
143
  return len(type_lookup["arrays"]) - 1
@@ -1,4 +1,7 @@
1
+ from contextlib import contextmanager
1
2
  from pathlib import Path
3
+ import sys
4
+ from typing import Any
2
5
 
3
6
 
4
7
  def resolve_path(path):
@@ -10,3 +13,19 @@ def resolve_path(path):
10
13
  """
11
14
  # TODO: this only seems to be used in a test; remove?
12
15
  return Path.cwd() / Path(path).resolve() # cwd is ignored if already absolute
16
+
17
+
18
+ @contextmanager
19
+ def override_module_attrs(module_name: str, overrides: dict[str, Any]):
20
+ """Context manager to temporarily override module-level attributes. The module must be
21
+ imported (i.e. within `sys.modules`)."""
22
+
23
+ module = sys.modules[module_name]
24
+ original_values = {k: getattr(module, k) for k in overrides}
25
+ try:
26
+ for k, v in overrides.items():
27
+ setattr(module, k, v)
28
+ yield
29
+ finally:
30
+ for k, v in original_values.items():
31
+ setattr(module, k, v)
@@ -6,7 +6,11 @@ from typing import cast, TYPE_CHECKING
6
6
  import numpy as np
7
7
  import zarr # type: ignore
8
8
  import pytest
9
- from hpcflow.sdk.core.test_utils import make_test_data_YAML_workflow, make_workflow
9
+ from hpcflow.sdk.core.test_utils import (
10
+ make_schemas,
11
+ make_test_data_YAML_workflow,
12
+ make_workflow,
13
+ )
10
14
  from hpcflow.sdk.persistence.json import (
11
15
  JSONPersistentStore,
12
16
  JsonStoreElement,
@@ -551,3 +555,38 @@ def test_zarr_encode_decode_jobscript_block_dependencies_large_one_to_one():
551
555
  arr = ZarrPersistentStore._encode_jobscript_block_dependencies(deps_t)
552
556
  deps_rt = ZarrPersistentStore._decode_jobscript_block_dependencies(arr)
553
557
  assert deps_rt == deps
558
+
559
+
560
+ @pytest.mark.parametrize(
561
+ "array",
562
+ (
563
+ np.array([]),
564
+ np.empty(0),
565
+ np.empty((0, 1, 2)),
566
+ np.array([1, 2, 3]),
567
+ np.array([[1, 2, 3], [4, 5, 6]]),
568
+ ),
569
+ )
570
+ def test_zarr_save_persistent_array_shape(null_config, tmp_path, array):
571
+ s1 = make_schemas(({"p1": None}, ()))
572
+ t1 = hf.Task(schema=s1, inputs={"p1": array})
573
+ wk = hf.Workflow.from_template_data(
574
+ template_name="test_save_empty_array",
575
+ tasks=[t1],
576
+ path=tmp_path,
577
+ )
578
+ assert array.shape == wk.tasks[0].elements[0].get("inputs.p1")[:].shape
579
+
580
+
581
+ def test_zarr_single_chunk_threshold(null_config, tmp_path):
582
+ # test very large arrays (> ~1 GB) are saved using multiple chunks
583
+ array = np.zeros(
584
+ 268_435_456
585
+ ) # ~ 2.147483647 GB; greater than blosc's max buffer size
586
+ s1 = make_schemas(({"p1": None}, ()))
587
+ t1 = hf.Task(schema=s1, inputs={"p1": array})
588
+ wk = hf.Workflow.from_template_data(
589
+ template_name="test_large_array",
590
+ tasks=[t1],
591
+ path=tmp_path,
592
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hpcflow-new2
3
- Version: 0.2.0a221
3
+ Version: 0.2.0a223
4
4
  Summary: Computational workflow management
5
5
  License: MPL-2.0
6
6
  Author: aplowman
@@ -1,7 +1,7 @@
1
1
  hpcflow/__init__.py,sha256=WIETuRHeOp2SqUqHUzpjQ-lk9acbYv-6aWOhZPRdlhs,64
2
2
  hpcflow/__pyinstaller/__init__.py,sha256=YOzBlPSck6slucv6lJM9K80JtsJWxXRL00cv6tRj3oc,98
3
3
  hpcflow/__pyinstaller/hook-hpcflow.py,sha256=P2b-8QdQqkSS7cJB6CB3CudUuJ9iZzTh2fQF4hNdCa4,1118
4
- hpcflow/_version.py,sha256=Y8c0z1l-waBysyZgLJvoHzKQD3sH5a9afvk3S4fdb8A,26
4
+ hpcflow/_version.py,sha256=ExqG_C9ZO0pdvjMEloB9_jw5T23HBOJ1y0ajtaOl5UY,26
5
5
  hpcflow/app.py,sha256=gl2viVS65PbpDhUp2DARaYHFDqDWQjuoyB3ikrCNRW4,1367
6
6
  hpcflow/cli.py,sha256=G2J3D9v6MnMWOWMMWK6UEKLn_6wnV9lT_qygEBBxg-I,66
7
7
  hpcflow/data/demo_data_manifest/__init__.py,sha256=Hsq0jT8EXM13wu1MpGy5FQgyuz56ygep4VWOnulFn50,41
@@ -125,7 +125,7 @@ hpcflow/sdk/persistence/pending.py,sha256=JB42Emk2cN639bPlNtQtoFhe5WXGWDJeFm7aFU
125
125
  hpcflow/sdk/persistence/store_resource.py,sha256=P-VZF7gMIsVvgOsHJEplK1Se4jHsaAqbdoKoQo-7_LU,5903
126
126
  hpcflow/sdk/persistence/types.py,sha256=c4vd8RW8bg8Meoz1HNXjGT8Za9Hpfl1Sxp4pLAgdV0g,9125
127
127
  hpcflow/sdk/persistence/utils.py,sha256=TuQp0BKwiy1-5WTgiMaYXn00bZb6HWHAjs4BBFE9khw,1804
128
- hpcflow/sdk/persistence/zarr.py,sha256=yCsiNteaok-eM9x5jogeDoCPdnIpx1Yv7LgYm8qoVeA,88267
128
+ hpcflow/sdk/persistence/zarr.py,sha256=ozJSx0W8L5yVRVvF7gM7M8psF5vd_-dNDnXNa-Rh3sI,88979
129
129
  hpcflow/sdk/runtime.py,sha256=vPNu4_DXYMnRyJenkVLq8yt6_XG4tCuwX69Oew4OWmo,11269
130
130
  hpcflow/sdk/submission/__init__.py,sha256=79xJXVkddsuj3uJz3BV9iOnAV7vCeJvLrhKccOA_dnU,67
131
131
  hpcflow/sdk/submission/enums.py,sha256=DykNHISQDmfa0oDggj6iIKugeFXJd8iF9NwqV3MN2oA,2049
@@ -146,7 +146,7 @@ hpcflow/sdk/typing.py,sha256=gu2h7a0RJs_r5UtV8AFVh9TNjo8kGe1n6UOxeH3zdgg,5060
146
146
  hpcflow/sdk/utils/arrays.py,sha256=7znIaqfl_bGElZ6U_da0vumWPez6ITyNrSDu_AlACDI,2084
147
147
  hpcflow/sdk/utils/deferred_file.py,sha256=LYIjTPjAmbOflwUaq_-o-O6DZJCQzRHX8dl_rIWTO5c,1381
148
148
  hpcflow/sdk/utils/hashing.py,sha256=40H7yO-oYgrt6RCdlTDDzJV9YIGiQeDx6zGpMJrRKZo,484
149
- hpcflow/sdk/utils/patches.py,sha256=jpbodcOjhE5ccrHPz5-L-r9yC7s2ymQccIdJi6Zd2Jo,398
149
+ hpcflow/sdk/utils/patches.py,sha256=sVK8TjeeIEBgngm5PobxkKcIiuDORLaOzI_8ua1H3bw,988
150
150
  hpcflow/sdk/utils/strings.py,sha256=MGdVeQpEZjo_9_VSMNI2ArAesQpgAx4-RkHL7IAQInQ,1966
151
151
  hpcflow/tests/api/test_api.py,sha256=h0HT9W0Jd1pChrXYaBOVwGThaI3slGkloS0sbq2YX88,962
152
152
  hpcflow/tests/conftest.py,sha256=xtSqhOxjZYioiAPvrKwf7NFicZoA4BR9Si4J1A8mWHw,4083
@@ -188,7 +188,7 @@ hpcflow/tests/unit/test_meta_task.py,sha256=FEWLuUrP-6e6BO0CRJe2fQDDOZUjz1jfljL_
188
188
  hpcflow/tests/unit/test_multi_path_sequences.py,sha256=6cVYD2e7jMt-m4sr3Z6ixnKewvs-s--slP344trKIWM,9079
189
189
  hpcflow/tests/unit/test_object_list.py,sha256=uWBou5QCyScaM3DLqL_7RCguMdYZKMkeZR0V8Lpterk,3342
190
190
  hpcflow/tests/unit/test_parameter.py,sha256=_ImfDhmroXzPKwIuKiehsAw9qQPPHC9FQa8jN7OJqoA,7440
191
- hpcflow/tests/unit/test_persistence.py,sha256=cwQoa2czgZ0ZDtCRoRSdkfoFMgMQ5qjnef_mhsMWzrc,17472
191
+ hpcflow/tests/unit/test_persistence.py,sha256=bpBiL4YPMosbZk18Ug1JRqYJ69HHJzDYUff_2wDi-5w,18544
192
192
  hpcflow/tests/unit/test_resources.py,sha256=48zC3A5YaSu0MlLlGiohowDpVZNgORsikVX8mP5agvI,8011
193
193
  hpcflow/tests/unit/test_run.py,sha256=jFRC1-wV5G2ZCEU-KBKYeHVnJdDBUXX6MbxTrB-RJSE,6205
194
194
  hpcflow/tests/unit/test_run_directories.py,sha256=VA2xhDRe7agKO7q1Y8kP0101BstkjLiC1RReKrMI6jE,1088
@@ -218,8 +218,8 @@ hpcflow/tests/workflows/test_submission.py,sha256=SUbBUbD8C8LSulrI7aETkzP9RqED48
218
218
  hpcflow/tests/workflows/test_workflows.py,sha256=9z3rtXjA5iMOp4C0q4TkD_9kLzwourCY-obpeOtnNt0,18927
219
219
  hpcflow/tests/workflows/test_zip.py,sha256=MzEwsIAYV_1A3bD0XRo23zUwUKVzkkmNd8_cil6YdWQ,578
220
220
  hpcflow/viz_demo.ipynb,sha256=6D9uBbWK3oMfbaf93Tnv5riFPtW-2miUTWNr9kGcnd4,228913
221
- hpcflow_new2-0.2.0a221.dist-info/LICENSE,sha256=Xhxf_KsrJNJFGMogumZhXSTPhUOVHCWf7nU-TDzqg0E,16763
222
- hpcflow_new2-0.2.0a221.dist-info/METADATA,sha256=GWE9KIO5a4WyYLPuIZ_mZcmBJhUbWZJIhrt5LnjjrC0,2663
223
- hpcflow_new2-0.2.0a221.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
224
- hpcflow_new2-0.2.0a221.dist-info/entry_points.txt,sha256=aoGtCnFdfPcXfBdu2zZyMOJoz6fPgdR0elqsgrE-USU,106
225
- hpcflow_new2-0.2.0a221.dist-info/RECORD,,
221
+ hpcflow_new2-0.2.0a223.dist-info/LICENSE,sha256=Xhxf_KsrJNJFGMogumZhXSTPhUOVHCWf7nU-TDzqg0E,16763
222
+ hpcflow_new2-0.2.0a223.dist-info/METADATA,sha256=yMUy1XOWyJ9GWx93QPFA-RlqpLVVl7YeXikl2Vx7zIc,2663
223
+ hpcflow_new2-0.2.0a223.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
224
+ hpcflow_new2-0.2.0a223.dist-info/entry_points.txt,sha256=aoGtCnFdfPcXfBdu2zZyMOJoz6fPgdR0elqsgrE-USU,106
225
+ hpcflow_new2-0.2.0a223.dist-info/RECORD,,