PyPI - h5netcdf - Versions diffs - 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

h5netcdf 1.6.3py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of h5netcdf might be problematic. Click here for more details.

Files changed (14) hide show

h5netcdf/_version.py +16 -3
h5netcdf/attrs.py +23 -2
h5netcdf/core.py +193 -53
h5netcdf/dimensions.py +21 -5
h5netcdf/tests/conftest.py +12 -0
h5netcdf/tests/test_h5netcdf.py +345 -163
h5netcdf/utils.py +205 -0
{h5netcdf-1.6.3.dist-info → h5netcdf-1.7.0.dist-info}/METADATA +5 -5
h5netcdf-1.7.0.dist-info/RECORD +16 -0
h5netcdf-1.6.3.dist-info/RECORD +0 -16
{h5netcdf-1.6.3.dist-info → h5netcdf-1.7.0.dist-info}/WHEEL +0 -0
{h5netcdf-1.6.3.dist-info → h5netcdf-1.7.0.dist-info}/licenses/AUTHORS.txt +0 -0
{h5netcdf-1.6.3.dist-info → h5netcdf-1.7.0.dist-info}/licenses/LICENSE +0 -0
{h5netcdf-1.6.3.dist-info → h5netcdf-1.7.0.dist-info}/top_level.txt +0 -0

h5netcdf/tests/test_h5netcdf.py CHANGED Viewed

@@ -133,22 +133,27 @@ def is_h5py_char_working(tmp_netcdf, name):
             raise
-def write_legacy_netcdf(tmp_netcdf, write_module):
-    ds = write_module.Dataset(tmp_netcdf, "w")
-    ds.setncattr("global", 42)
+def write_legacy_netcdf(tmp_netcdf, write_module, format="NETCDF4"):
+    ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
+    intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
+    ds.setncattr("global", intf(42))
     ds.other_attr = "yes"
     ds.createDimension("x", 4)
     ds.createDimension("y", 5)
     ds.createDimension("z", 6)
-    ds.createDimension("empty", 0)
     ds.createDimension("string3", 3)
     ds.createDimension("unlimited", None)
+    if ds.data_model == "NETCDF4":
+        # In the CLASSIC format, only one unlimited dimension is allowed
+        ds.createDimension("empty", 0)
     v = ds.createVariable("foo", float, ("x", "y"), chunksizes=(4, 5), zlib=True)
     v[...] = 1
     v.setncattr("units", "meters")
-    v = ds.createVariable("y", int, ("y",), fill_value=-1)
+    v = ds.createVariable("y", intf, ("y",), fill_value=intf(-1))
     v[:4] = np.arange(4)
     v = ds.createVariable("z", "S1", ("z", "string3"), fill_value=b"X")
@@ -158,7 +163,7 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
     v[...] = 2.0
     # test creating a scalar with compression option (with should be ignored)
-    v = ds.createVariable("intscalar", np.int64, (), zlib=6, fill_value=None)
+    v = ds.createVariable("intscalar", intf, (), zlib=6, fill_value=None)
     v[...] = 2
     v = ds.createVariable("foo_unlimited", float, ("x", "unlimited"))
@@ -170,37 +175,60 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
     ):
         ds.createVariable("boolean", np.bool_, ("x"))
-    g = ds.createGroup("subgroup")
-    v = g.createVariable("subvar", np.int32, ("x",))
-    v[...] = np.arange(4.0)
+    ds.createDimension("mismatched_dim", 1)
+    ds.createVariable("mismatched_dim", intf, ())
-    g.createDimension("y", 10)
-    g.createVariable("y_var", float, ("y",))
+    if ds.data_model == "NETCDF4":
+        g = ds.createGroup("subgroup")
+        v = g.createVariable("subvar", np.int32, ("x",))
+        v[...] = np.arange(4.0)
-    ds.createDimension("mismatched_dim", 1)
-    ds.createVariable("mismatched_dim", int, ())
+        g.createDimension("y", 10)
+        g.createVariable("y_var", float, ("y",))
-    v = ds.createVariable("var_len_str", str, ("x"))
-    v[0] = "foo"
+        v = ds.createVariable("var_len_str", str, ("x"))
+        v[0] = "foo"
-    enum_dict = dict(one=1, two=2, three=3, missing=255)
-    enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
-    v = ds.createVariable(
-        "enum_var",
-        enum_type,
-        ("x",),
-        fill_value=enum_dict["missing"],
-    )
-    v[0:3] = [1, 2, 3]
+        enum_dict = dict(one=1, two=2, three=3, missing=255)
+        enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
+        v = ds.createVariable(
+            "enum_var",
+            enum_type,
+            ("x",),
+            fill_value=enum_dict["missing"],
+        )
+        v[0:3] = [1, 2, 3]
     ds.close()
-def write_h5netcdf(tmp_netcdf, compression="gzip"):
-    ds = h5netcdf.File(tmp_netcdf, "w")
-    ds.attrs["global"] = 42
+def write_h5netcdf(tmp_netcdf, compression="gzip", format="NETCDF4"):
+    ds = h5netcdf.File(tmp_netcdf, mode="w", format=format)
+    intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
+    ds.attrs["global"] = intf(42)
     ds.attrs["other_attr"] = "yes"
-    ds.dimensions = {"x": 4, "y": 5, "z": 6, "empty": 0, "unlimited": None}
+    if ds.data_model == "NETCDF4_CLASSIC":
+        with raises(
+            CompatibilityError,
+            match="NETCDF4_CLASSIC format only allows one unlimited dimension.",
+        ):
+            ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None, "empty": 0}
+    ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None}
+    if ds.data_model == "NETCDF4":
+        ds.dimensions["empty"] = 0
+    if ds.data_model == "NETCDF4_CLASSIC":
+        with raises(
+            CompatibilityError,
+            match="Only one unlimited dimension allowed in the NETCDF4_CLASSIC format.",
+        ):
+            ds.dimensions["empty"] = 0
+        with raises(CompatibilityError, match=r"int64 \(CLASSIC\) dtypes"):
+            ds.attrs["int64_attr"] = 42
     v = ds.create_variable(
         "foo", ("x", "y"), float, chunks=(4, 5), compression=compression, shuffle=True
@@ -210,14 +238,14 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
     remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
     if not remote_file:
-        v = ds.create_variable("y", ("y",), int, fillvalue=-1)
+        v = ds.create_variable("y", ("y",), intf, fillvalue=intf(-1))
         v[:4] = np.arange(4)
     v = ds.create_variable("z", ("z", "string3"), data=_char_array, fillvalue=b"X")
     v = ds.create_variable("scalar", data=np.float32(2.0))
-    v = ds.create_variable("intscalar", data=np.int64(2))
+    v = ds.create_variable("intscalar", data=intf(2))
     v = ds.create_variable("foo_unlimited", ("x", "unlimited"), float)
     v[...] = 1
@@ -225,36 +253,40 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
     with raises((h5netcdf.CompatibilityError, TypeError)):
         ds.create_variable("boolean", data=True)
-    g = ds.create_group("subgroup")
-    v = g.create_variable("subvar", ("x",), np.int32)
-    v[...] = np.arange(4.0)
-    with raises(AttributeError):
-        v.attrs["_Netcdf4Dimid"] = -1
-    g.dimensions["y"] = 10
-    g.create_variable("y_var", ("y",), float)
-    g.flush()
     ds.dimensions["mismatched_dim"] = 1
-    ds.create_variable("mismatched_dim", dtype=int)
+    ds.create_variable("mismatched_dim", dtype=intf)
     ds.flush()
-    dt = h5py.special_dtype(vlen=str)
-    v = ds.create_variable("var_len_str", ("x",), dtype=dt)
-    v[0] = _vlen_string
+    if ds.data_model == "NETCDF4":
+        g = ds.create_group("subgroup")
+        v = g.create_variable("subvar", ("x",), np.int32)
+        v[...] = np.arange(4.0)
+        with raises(AttributeError):
+            v.attrs["_Netcdf4Dimid"] = -1
-    enum_dict = dict(one=1, two=2, three=3, missing=255)
-    enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
-    v = ds.create_variable(
-        "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
-    )
-    v[0:3] = [1, 2, 3]
+        g.dimensions["y"] = 10
+        g.create_variable("y_var", ("y",), float)
+        g.flush()
+        dt = h5py.special_dtype(vlen=str)
+        v = ds.create_variable("var_len_str", ("x",), dtype=dt)
+        v[0] = _vlen_string
+        enum_dict = dict(one=1, two=2, three=3, missing=255)
+        enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
+        v = ds.create_variable(
+            "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
+        )
+        v[0:3] = [1, 2, 3]
     ds.close()
 def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
     ds = read_module.Dataset(tmp_netcdf, "r")
+    intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
     assert ds.ncattrs() == ["global", "other_attr"]
     assert ds.getncattr("global") == 42
     if write_module is not netCDF4:
@@ -262,48 +294,60 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
         assert ds.other_attr == "yes"
     with raises(AttributeError, match="not found"):
         ds.does_not_exist
-    assert set(ds.dimensions) == set(
-        ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"]
-    )
-    assert set(ds.variables) == set(
-        [
+    dimensions = {
+        "x",
+        "y",
+        "z",
+        "string3",
+        "mismatched_dim",
+        "unlimited",
+    }
+    variables = {
+        "foo",
+        "y",
+        "z",
+        "intscalar",
+        "scalar",
+        "foo_unlimited",
+        "mismatched_dim",
+    }
+    if ds.data_model == "NETCDF4":
+        dimensions |= {"empty"}
+        variables |= {
             "enum_var",
-            "foo",
-            "y",
-            "z",
-            "intscalar",
-            "scalar",
             "var_len_str",
-            "mismatched_dim",
-            "foo_unlimited",
-        ]
-    )
+        }
-    assert set(ds.enumtypes) == set(["enum_t"])
+    assert set(ds.dimensions) == dimensions
+    assert set(ds.variables) == variables
-    assert set(ds.groups) == set(["subgroup"])
-    assert ds.parent is None
-    v = ds.variables["foo"]
-    assert array_equal(v, np.ones((4, 5)))
-    assert v.dtype == float
-    assert v.dimensions == ("x", "y")
-    assert v.ndim == 2
-    assert v.ncattrs() == ["units"]
-    if write_module is not netCDF4:
-        assert v.getncattr("units") == "meters"
-    assert tuple(v.chunking()) == (4, 5)
+    if ds.data_model == "NETCDF4":
+        assert set(ds.enumtypes) == {"enum_t"}
-    # check for dict items separately
-    # see https://github.com/h5netcdf/h5netcdf/issues/171
-    filters = v.filters()
-    assert filters["complevel"] == 4
-    assert filters["fletcher32"] is False
-    assert filters["shuffle"] is True
-    assert filters["zlib"] is True
+        assert set(ds.groups) == {"subgroup"}
+        assert ds.parent is None
+        v = ds.variables["foo"]
+        assert array_equal(v, np.ones((4, 5)))
+        assert v.dtype == float
+        assert v.dimensions == ("x", "y")
+        assert v.ndim == 2
+        assert v.ncattrs() == ["units"]
+        if write_module is not netCDF4:
+            assert v.getncattr("units") == "meters"
+        assert tuple(v.chunking()) == (4, 5)
+        # check for dict items separately
+        # see https://github.com/h5netcdf/h5netcdf/issues/171
+        filters = v.filters()
+        assert filters["complevel"] == 4
+        assert filters["fletcher32"] is False
+        assert filters["shuffle"] is True
+        assert filters["zlib"] is True
     v = ds.variables["y"]
     assert array_equal(v, np.r_[np.arange(4), [-1]])
-    assert v.dtype == int
+    assert v.dtype == intf
     assert v.dimensions == ("y",)
     assert v.ndim == 1
     assert v.ncattrs() == ["_FillValue"]
@@ -343,32 +387,33 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
     v = ds.variables["intscalar"]
     assert array_equal(v, np.array(2))
-    assert v.dtype == "int64"
+    assert v.dtype == intf
     assert v.ndim == 0
     assert v.dimensions == ()
     assert v.ncattrs() == []
-    v = ds.variables["var_len_str"]
-    assert v.dtype == str
-    assert v[0] == _vlen_string
+    if ds.data_model == "NETCDF4":
+        v = ds.variables["var_len_str"]
+        assert v.dtype == str
+        assert v[0] == _vlen_string
-    v = ds.groups["subgroup"].variables["subvar"]
-    assert ds.groups["subgroup"].parent is ds
-    assert array_equal(v, np.arange(4.0))
-    assert v.dtype == "int32"
-    assert v.ndim == 1
-    assert v.dimensions == ("x",)
-    assert v.ncattrs() == []
+        v = ds.groups["subgroup"].variables["subvar"]
+        assert ds.groups["subgroup"].parent is ds
+        assert array_equal(v, np.arange(4.0))
+        assert v.dtype == "int32"
+        assert v.ndim == 1
+        assert v.dimensions == ("x",)
+        assert v.ncattrs() == []
-    v = ds.groups["subgroup"].variables["y_var"]
-    assert v.shape == (10,)
-    assert "y" in ds.groups["subgroup"].dimensions
+        v = ds.groups["subgroup"].variables["y_var"]
+        assert v.shape == (10,)
+        assert "y" in ds.groups["subgroup"].dimensions
-    enum_dict = dict(one=1, two=2, three=3, missing=255)
-    enum_type = ds.enumtypes["enum_t"]
-    assert enum_type.enum_dict == enum_dict
-    v = ds.variables["enum_var"]
-    assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
+        enum_dict = dict(one=1, two=2, three=3, missing=255)
+        enum_type = ds.enumtypes["enum_t"]
+        assert enum_type.enum_dict == enum_dict
+        v = ds.variables["enum_var"]
+        assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
     ds.close()
@@ -376,33 +421,41 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
 def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
     remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
     ds = h5netcdf.File(tmp_netcdf, "r", **decode_vlen_strings)
+    intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
     assert ds.name == "/"
     assert list(ds.attrs) == ["global", "other_attr"]
     assert ds.attrs["global"] == 42
     if write_module is not netCDF4:
         # skip for now: https://github.com/Unidata/netcdf4-python/issues/388
         assert ds.attrs["other_attr"] == "yes"
-    assert set(ds.dimensions) == set(
-        ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"]
-    )
-    variables = set(
-        [
-            "enum_var",
-            "foo",
-            "z",
-            "intscalar",
-            "scalar",
-            "var_len_str",
-            "mismatched_dim",
-            "foo_unlimited",
-        ]
-    )
+    dimensions = {
+        "x",
+        "y",
+        "z",
+        "string3",
+        "mismatched_dim",
+        "unlimited",
+    }
+    variables = {
+        "foo",
+        "z",
+        "intscalar",
+        "scalar",
+        "foo_unlimited",
+        "mismatched_dim",
+    }
     # fix current failure of hsds/h5pyd
     if not remote_file:
-        variables |= set(["y"])
+        variables |= {"y"}
+    if ds.data_model == "NETCDF4":
+        dimensions |= {"empty"}
+        variables |= {"enum_var", "var_len_str"}
+    assert set(ds.dimensions) == dimensions
     assert set(ds.variables) == variables
-    assert set(ds.groups) == set(["subgroup"])
     assert ds.parent is None
     v = ds["foo"]
@@ -424,7 +477,7 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
     if not remote_file:
         v = ds["y"]
         assert array_equal(v, np.r_[np.arange(4), [-1]])
-        assert v.dtype == int
+        assert v.dtype == intf
         assert v.dimensions == ("y",)
         assert v.ndim == 1
         assert list(v.attrs) == ["_FillValue"]
@@ -458,45 +511,48 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
     v = ds.variables["intscalar"]
     assert array_equal(v, np.array(2))
-    assert v.dtype == "int64"
+    assert v.dtype == intf
     assert v.ndim == 0
     assert v.dimensions == ()
     assert list(v.attrs) == []
-    v = ds["var_len_str"]
-    assert h5py.check_dtype(vlen=v.dtype) is str
-    if getattr(ds, "decode_vlen_strings", True):
-        assert v[0] == _vlen_string
-    else:
-        assert v[0] == _vlen_string.encode("utf_8")
-    v = ds["/subgroup/subvar"]
-    assert v is ds["subgroup"]["subvar"]
-    assert v is ds["subgroup/subvar"]
-    assert v is ds["subgroup"]["/subgroup/subvar"]
-    assert v.name == "/subgroup/subvar"
-    assert ds["subgroup"].name == "/subgroup"
-    assert ds["subgroup"].parent is ds
-    assert array_equal(v, np.arange(4.0))
-    assert v.dtype == "int32"
-    assert v.ndim == 1
-    assert v.dimensions == ("x",)
-    assert list(v.attrs) == []
+    if ds.data_model == "NETCDF4":
+        assert set(ds.groups) == {"subgroup"}
-    assert ds["/subgroup/y_var"].shape == (10,)
-    assert ds["/subgroup"].dimensions["y"].size == 10
+        v = ds["var_len_str"]
+        assert h5py.check_dtype(vlen=v.dtype) is str
+        if getattr(ds, "decode_vlen_strings", True):
+            assert v[0] == _vlen_string
+        else:
+            assert v[0] == _vlen_string.encode("utf_8")
+        v = ds["/subgroup/subvar"]
+        assert v is ds["subgroup"]["subvar"]
+        assert v is ds["subgroup/subvar"]
+        assert v is ds["subgroup"]["/subgroup/subvar"]
+        assert v.name == "/subgroup/subvar"
+        assert ds["subgroup"].name == "/subgroup"
+        assert ds["subgroup"].parent is ds
+        assert array_equal(v, np.arange(4.0))
+        assert v.dtype == "int32"
+        assert v.ndim == 1
+        assert v.dimensions == ("x",)
+        assert list(v.attrs) == []
-    enum_dict = dict(one=1, two=2, three=3, missing=255)
-    enum_type = ds.enumtypes["enum_t"]
-    assert enum_type.enum_dict == enum_dict
-    v = ds.variables["enum_var"]
-    assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
+        assert ds["/subgroup/y_var"].shape == (10,)
+        assert ds["/subgroup"].dimensions["y"].size == 10
+        enum_dict = dict(one=1, two=2, three=3, missing=255)
+        enum_type = ds.enumtypes["enum_t"]
+        assert enum_type.enum_dict == enum_dict
+        v = ds.variables["enum_var"]
+        assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
     ds.close()
-def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module):
-    write_legacy_netcdf(tmp_netcdf, write_module)
+def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module, format="NETCDF4"):
+    write_legacy_netcdf(tmp_netcdf, write_module, format)
     read_legacy_netcdf(tmp_netcdf, read_module, write_module)
@@ -517,13 +573,15 @@ def test_write_h5netcdf_read_legacyapi(tmp_local_netcdf):
     read_legacy_netcdf(tmp_local_netcdf, legacyapi, h5netcdf)
-def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf):
-    write_h5netcdf(tmp_local_netcdf)
+def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf, data_model):
+    write_h5netcdf(tmp_local_netcdf, **data_model)
     read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
-def test_roundtrip_h5netcdf(tmp_local_or_remote_netcdf, decode_vlen_strings):
-    write_h5netcdf(tmp_local_or_remote_netcdf)
+def test_roundtrip_h5netcdf(
+    tmp_local_or_remote_netcdf, decode_vlen_strings, data_model
+):
+    write_h5netcdf(tmp_local_or_remote_netcdf, **data_model)
     read_h5netcdf(tmp_local_or_remote_netcdf, h5netcdf, decode_vlen_strings)
@@ -532,8 +590,8 @@ def test_write_compression_as_zlib(tmp_local_netcdf):
     read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
-def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
-    write_legacy_netcdf(tmp_local_netcdf, netCDF4)
+def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings, data_model):
+    write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
     read_h5netcdf(tmp_local_netcdf, netCDF4, decode_vlen_strings)
@@ -542,6 +600,22 @@ def test_write_legacyapi_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
     read_h5netcdf(tmp_local_netcdf, legacyapi, decode_vlen_strings)
+@pytest.mark.parametrize("strict", [True, False])
+@pytest.mark.parametrize("dataset", [None, "enum_var"])
+@pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
+def test_dump_netcdf4_vs_h5netcdf(
+    tmp_local_netcdf, dataset, h5dump, data_model, strict
+):
+    """Check that the generated file is identical to netCDF4 by comparing h5dump output."""
+    write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
+    expected = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
+    write_legacy_netcdf(tmp_local_netcdf, legacyapi, **data_model)
+    actual = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
+    assert actual == expected
 def test_fileobj(decode_vlen_strings):
     fileobj = tempfile.TemporaryFile()
     write_h5netcdf(fileobj)
@@ -845,13 +919,13 @@ def test_hierarchical_access_auto_create(tmp_local_or_remote_netcdf):
     ds.create_variable("/foo/bar", data=1)
     g = ds.create_group("foo/baz")
     g.create_variable("/foo/hello", data=2)
-    assert set(ds) == set(["foo"])
-    assert set(ds["foo"]) == set(["bar", "baz", "hello"])
+    assert set(ds) == {"foo"}
+    assert set(ds["foo"]) == {"bar", "baz", "hello"}
     ds.close()
     ds = h5netcdf.File(tmp_local_or_remote_netcdf, "r")
-    assert set(ds) == set(["foo"])
-    assert set(ds["foo"]) == set(["bar", "baz", "hello"])
+    assert set(ds) == {"foo"}
+    assert set(ds["foo"]) == {"bar", "baz", "hello"}
     ds.close()
@@ -1231,6 +1305,12 @@ def test_reading_unlimited_dimensions_created_with_c_api(tmp_local_netcdf):
         f.groups["test"]["dummy4"].shape == (0, 0)
         assert f["dummy5"].shape == (2, 3)
+        # regression test for https://github.com/pydata/xarray/issues/10818
+        # h5netcdf issue https://github.com/h5netcdf/h5netcdf/issues/287
+        # slicing a variable with slice exceeding it's shape
+        # should return only up to shape-size
+        assert f["dummy1"][:10, :2].shape == (2, 2)
 def test_reading_unused_unlimited_dimension(tmp_local_or_remote_netcdf):
     """Test reading a file with unused dimension of unlimited size"""
@@ -2274,9 +2354,7 @@ def test_vlen_string_dataset_fillvalue(tmp_local_netcdf, decode_vlen_strings):
     "ros3" not in h5py.registered_drivers(), reason="ros3 not available"
 )
 def test_ros3():
-    fname = (
-        "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
-    )
+    fname = "https://archive.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
     f = h5netcdf.File(fname, "r", driver="ros3")
     assert "Temperature" in list(f)
     f.close()
@@ -2817,3 +2895,107 @@ def test_h5pyd_append(hsds_up):
     with h5netcdf.File(fname, "a", driver="h5pyd") as ds:
         assert ds._preexisting_file
+def test_raise_on_closed_file(tmp_local_netcdf):
+    f = h5netcdf.File(tmp_local_netcdf, "w")
+    f.dimensions = {"x": 5}
+    v = f.create_variable("hello", ("x",), float)
+    v[:] = np.ones(5)
+    f.close()
+    with pytest.raises(
+        ValueError,
+        match=f"I/O operation on <Closed h5netcdf.File>: '{tmp_local_netcdf}'",
+    ):
+        print(v[:])
+def write_legacy_string_array(tmp_netcdf, write_module, format):
+    ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
+    # we do not handle "_Encoding"
+    if write_module == netCDF4:
+        ds.set_auto_chartostring(False)
+    data = np.array(
+        [
+            [b"apple ", b"berry ", b"cherry", b"dates ", b"elder "],
+            [b"fig   ", b"grape ", b"honey ", b"iris  ", b"jelly "],
+            [b"kiwi  ", b"lemon ", b"mango ", b"nectar", b"olive "],
+            [b"peach ", b"quince", b"raisin", b"salak ", b"tomat "],
+        ],
+        dtype="S6",
+    )
+    data = string_to_char(data)
+    ds.createDimension("n1", None)
+    ds.createDimension("n2", 5)
+    ds.createDimension("nchar", 6)
+    v = ds.createVariable("strings", "S1", ("n1", "n2", "nchar"))
+    # netCDF4 can't resize with incomplete slices and unfitting dimensions
+    if write_module == netCDF4:
+        v[...] = data
+    v[:-1] = data[:-1]
+    v[-1] = data[-1]
+    v[-1, -1] = data[-1, -1]
+    ds.close()
+@pytest.mark.parametrize("strict", [True, False])
+@pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
+def test_dump_string_array(tmp_local_netcdf, h5dump, data_model, strict):
+    write_legacy_string_array(tmp_local_netcdf, netCDF4, **data_model)
+    expected = h5dump(tmp_local_netcdf, strict=strict)
+    write_legacy_string_array(tmp_local_netcdf, legacyapi, **data_model)
+    actual = h5dump(tmp_local_netcdf, strict=strict)
+    assert actual == expected
+def maybe_resize_with_broadcasting(tmp_netcdf, write_module, data_model):
+    ds = write_module.Dataset(tmp_netcdf, mode="w", **data_model)
+    n1, n2, n3 = 4, 5, 6
+    data = np.arange(n1 * n2 * n3).reshape((n1, n2, n3))
+    ds.createDimension("n1", None)
+    ds.createDimension("n2", n2)
+    ds.createDimension("n3", n3)
+    v = ds.createVariable("numbers", "i4", ("n1", "n2", "n3"))
+    # netcdf4-python doe not handle this, need to write once with full data
+    if write_module == netCDF4:
+        v[:] = data
+    v[:-1] = data[:-1]
+    v[-1] = data[-1]
+    v[-1, -1] = data[-1, -1]
+    ds.close()
+@pytest.mark.parametrize("dataset", [None, "numbers"])
+@pytest.mark.parametrize("strict", [True, False])
+@pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
+def test_dump_maybe_resize_with_broadcasting(
+    tmp_local_netcdf, data_model, h5dump, dataset, strict
+):
+    maybe_resize_with_broadcasting(tmp_local_netcdf, netCDF4, data_model)
+    expected = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
+    maybe_resize_with_broadcasting(tmp_local_netcdf, legacyapi, data_model)
+    actual = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
+    assert actual == expected
+def test_is_classic(tmp_local_netcdf):
+    """Check that the generated file is recognized as netCDF-4 classic model by ncdump."""
+    import subprocess
+    write_h5netcdf(tmp_local_netcdf, format="NETCDF4_CLASSIC")
+    out = subprocess.run(["ncdump", "-k", tmp_local_netcdf], capture_output=True)
+    assert out.stdout.decode().strip() == "netCDF-4 classic model"

h5netcdf 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

h5netcdf 1.6.3py3-none-any.whl → 1.7.0py3-none-any.whl