h5netcdf 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of h5netcdf might be problematic. Click here for more details.

@@ -133,22 +133,27 @@ def is_h5py_char_working(tmp_netcdf, name):
133
133
  raise
134
134
 
135
135
 
136
- def write_legacy_netcdf(tmp_netcdf, write_module):
137
- ds = write_module.Dataset(tmp_netcdf, "w")
138
- ds.setncattr("global", 42)
136
+ def write_legacy_netcdf(tmp_netcdf, write_module, format="NETCDF4"):
137
+ ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
138
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
139
+
140
+ ds.setncattr("global", intf(42))
139
141
  ds.other_attr = "yes"
140
142
  ds.createDimension("x", 4)
141
143
  ds.createDimension("y", 5)
142
144
  ds.createDimension("z", 6)
143
- ds.createDimension("empty", 0)
144
145
  ds.createDimension("string3", 3)
145
146
  ds.createDimension("unlimited", None)
146
147
 
148
+ if ds.data_model == "NETCDF4":
149
+ # In the CLASSIC format, only one unlimited dimension is allowed
150
+ ds.createDimension("empty", 0)
151
+
147
152
  v = ds.createVariable("foo", float, ("x", "y"), chunksizes=(4, 5), zlib=True)
148
153
  v[...] = 1
149
154
  v.setncattr("units", "meters")
150
155
 
151
- v = ds.createVariable("y", int, ("y",), fill_value=-1)
156
+ v = ds.createVariable("y", intf, ("y",), fill_value=intf(-1))
152
157
  v[:4] = np.arange(4)
153
158
 
154
159
  v = ds.createVariable("z", "S1", ("z", "string3"), fill_value=b"X")
@@ -158,7 +163,7 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
158
163
  v[...] = 2.0
159
164
 
160
165
  # test creating a scalar with compression option (with should be ignored)
161
- v = ds.createVariable("intscalar", np.int64, (), zlib=6, fill_value=None)
166
+ v = ds.createVariable("intscalar", intf, (), zlib=6, fill_value=None)
162
167
  v[...] = 2
163
168
 
164
169
  v = ds.createVariable("foo_unlimited", float, ("x", "unlimited"))
@@ -170,37 +175,60 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
170
175
  ):
171
176
  ds.createVariable("boolean", np.bool_, ("x"))
172
177
 
173
- g = ds.createGroup("subgroup")
174
- v = g.createVariable("subvar", np.int32, ("x",))
175
- v[...] = np.arange(4.0)
178
+ ds.createDimension("mismatched_dim", 1)
179
+ ds.createVariable("mismatched_dim", intf, ())
176
180
 
177
- g.createDimension("y", 10)
178
- g.createVariable("y_var", float, ("y",))
181
+ if ds.data_model == "NETCDF4":
182
+ g = ds.createGroup("subgroup")
183
+ v = g.createVariable("subvar", np.int32, ("x",))
184
+ v[...] = np.arange(4.0)
179
185
 
180
- ds.createDimension("mismatched_dim", 1)
181
- ds.createVariable("mismatched_dim", int, ())
186
+ g.createDimension("y", 10)
187
+ g.createVariable("y_var", float, ("y",))
182
188
 
183
- v = ds.createVariable("var_len_str", str, ("x"))
184
- v[0] = "foo"
189
+ v = ds.createVariable("var_len_str", str, ("x"))
190
+ v[0] = "foo"
185
191
 
186
- enum_dict = dict(one=1, two=2, three=3, missing=255)
187
- enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
188
- v = ds.createVariable(
189
- "enum_var",
190
- enum_type,
191
- ("x",),
192
- fill_value=enum_dict["missing"],
193
- )
194
- v[0:3] = [1, 2, 3]
192
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
193
+ enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
194
+ v = ds.createVariable(
195
+ "enum_var",
196
+ enum_type,
197
+ ("x",),
198
+ fill_value=enum_dict["missing"],
199
+ )
200
+ v[0:3] = [1, 2, 3]
195
201
 
196
202
  ds.close()
197
203
 
198
204
 
199
- def write_h5netcdf(tmp_netcdf, compression="gzip"):
200
- ds = h5netcdf.File(tmp_netcdf, "w")
201
- ds.attrs["global"] = 42
205
+ def write_h5netcdf(tmp_netcdf, compression="gzip", format="NETCDF4"):
206
+ ds = h5netcdf.File(tmp_netcdf, mode="w", format=format)
207
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
208
+ ds.attrs["global"] = intf(42)
202
209
  ds.attrs["other_attr"] = "yes"
203
- ds.dimensions = {"x": 4, "y": 5, "z": 6, "empty": 0, "unlimited": None}
210
+
211
+ if ds.data_model == "NETCDF4_CLASSIC":
212
+ with raises(
213
+ CompatibilityError,
214
+ match="NETCDF4_CLASSIC format only allows one unlimited dimension.",
215
+ ):
216
+ ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None, "empty": 0}
217
+
218
+ ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None}
219
+
220
+ if ds.data_model == "NETCDF4":
221
+ ds.dimensions["empty"] = 0
222
+
223
+ if ds.data_model == "NETCDF4_CLASSIC":
224
+ with raises(
225
+ CompatibilityError,
226
+ match="Only one unlimited dimension allowed in the NETCDF4_CLASSIC format.",
227
+ ):
228
+ ds.dimensions["empty"] = 0
229
+
230
+ with raises(CompatibilityError, match=r"int64 \(CLASSIC\) dtypes"):
231
+ ds.attrs["int64_attr"] = 42
204
232
 
205
233
  v = ds.create_variable(
206
234
  "foo", ("x", "y"), float, chunks=(4, 5), compression=compression, shuffle=True
@@ -210,14 +238,14 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
210
238
 
211
239
  remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
212
240
  if not remote_file:
213
- v = ds.create_variable("y", ("y",), int, fillvalue=-1)
241
+ v = ds.create_variable("y", ("y",), intf, fillvalue=intf(-1))
214
242
  v[:4] = np.arange(4)
215
243
 
216
244
  v = ds.create_variable("z", ("z", "string3"), data=_char_array, fillvalue=b"X")
217
245
 
218
246
  v = ds.create_variable("scalar", data=np.float32(2.0))
219
247
 
220
- v = ds.create_variable("intscalar", data=np.int64(2))
248
+ v = ds.create_variable("intscalar", data=intf(2))
221
249
 
222
250
  v = ds.create_variable("foo_unlimited", ("x", "unlimited"), float)
223
251
  v[...] = 1
@@ -225,36 +253,40 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
225
253
  with raises((h5netcdf.CompatibilityError, TypeError)):
226
254
  ds.create_variable("boolean", data=True)
227
255
 
228
- g = ds.create_group("subgroup")
229
- v = g.create_variable("subvar", ("x",), np.int32)
230
- v[...] = np.arange(4.0)
231
- with raises(AttributeError):
232
- v.attrs["_Netcdf4Dimid"] = -1
233
-
234
- g.dimensions["y"] = 10
235
- g.create_variable("y_var", ("y",), float)
236
- g.flush()
237
-
238
256
  ds.dimensions["mismatched_dim"] = 1
239
- ds.create_variable("mismatched_dim", dtype=int)
257
+ ds.create_variable("mismatched_dim", dtype=intf)
240
258
  ds.flush()
241
259
 
242
- dt = h5py.special_dtype(vlen=str)
243
- v = ds.create_variable("var_len_str", ("x",), dtype=dt)
244
- v[0] = _vlen_string
260
+ if ds.data_model == "NETCDF4":
261
+ g = ds.create_group("subgroup")
262
+ v = g.create_variable("subvar", ("x",), np.int32)
263
+ v[...] = np.arange(4.0)
264
+ with raises(AttributeError):
265
+ v.attrs["_Netcdf4Dimid"] = -1
245
266
 
246
- enum_dict = dict(one=1, two=2, three=3, missing=255)
247
- enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
248
- v = ds.create_variable(
249
- "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
250
- )
251
- v[0:3] = [1, 2, 3]
267
+ g.dimensions["y"] = 10
268
+ g.create_variable("y_var", ("y",), float)
269
+ g.flush()
270
+
271
+ dt = h5py.special_dtype(vlen=str)
272
+ v = ds.create_variable("var_len_str", ("x",), dtype=dt)
273
+ v[0] = _vlen_string
274
+
275
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
276
+ enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
277
+ v = ds.create_variable(
278
+ "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
279
+ )
280
+ v[0:3] = [1, 2, 3]
252
281
 
253
282
  ds.close()
254
283
 
255
284
 
256
285
  def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
257
286
  ds = read_module.Dataset(tmp_netcdf, "r")
287
+
288
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
289
+
258
290
  assert ds.ncattrs() == ["global", "other_attr"]
259
291
  assert ds.getncattr("global") == 42
260
292
  if write_module is not netCDF4:
@@ -262,48 +294,60 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
262
294
  assert ds.other_attr == "yes"
263
295
  with raises(AttributeError, match="not found"):
264
296
  ds.does_not_exist
265
- assert set(ds.dimensions) == set(
266
- ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"]
267
- )
268
- assert set(ds.variables) == set(
269
- [
297
+ dimensions = {
298
+ "x",
299
+ "y",
300
+ "z",
301
+ "string3",
302
+ "mismatched_dim",
303
+ "unlimited",
304
+ }
305
+ variables = {
306
+ "foo",
307
+ "y",
308
+ "z",
309
+ "intscalar",
310
+ "scalar",
311
+ "foo_unlimited",
312
+ "mismatched_dim",
313
+ }
314
+ if ds.data_model == "NETCDF4":
315
+ dimensions |= {"empty"}
316
+ variables |= {
270
317
  "enum_var",
271
- "foo",
272
- "y",
273
- "z",
274
- "intscalar",
275
- "scalar",
276
318
  "var_len_str",
277
- "mismatched_dim",
278
- "foo_unlimited",
279
- ]
280
- )
319
+ }
281
320
 
282
- assert set(ds.enumtypes) == set(["enum_t"])
321
+ assert set(ds.dimensions) == dimensions
322
+ assert set(ds.variables) == variables
283
323
 
284
- assert set(ds.groups) == set(["subgroup"])
285
- assert ds.parent is None
286
- v = ds.variables["foo"]
287
- assert array_equal(v, np.ones((4, 5)))
288
- assert v.dtype == float
289
- assert v.dimensions == ("x", "y")
290
- assert v.ndim == 2
291
- assert v.ncattrs() == ["units"]
292
- if write_module is not netCDF4:
293
- assert v.getncattr("units") == "meters"
294
- assert tuple(v.chunking()) == (4, 5)
324
+ if ds.data_model == "NETCDF4":
325
+ assert set(ds.enumtypes) == {"enum_t"}
295
326
 
296
- # check for dict items separately
297
- # see https://github.com/h5netcdf/h5netcdf/issues/171
298
- filters = v.filters()
299
- assert filters["complevel"] == 4
300
- assert filters["fletcher32"] is False
301
- assert filters["shuffle"] is True
302
- assert filters["zlib"] is True
327
+ assert set(ds.groups) == {"subgroup"}
328
+ assert ds.parent is None
329
+ v = ds.variables["foo"]
330
+ assert array_equal(v, np.ones((4, 5)))
331
+ assert v.dtype == float
332
+ assert v.dimensions == ("x", "y")
333
+ assert v.ndim == 2
334
+ assert v.ncattrs() == ["units"]
335
+
336
+ if write_module is not netCDF4:
337
+ assert v.getncattr("units") == "meters"
338
+ assert tuple(v.chunking()) == (4, 5)
339
+
340
+ # check for dict items separately
341
+ # see https://github.com/h5netcdf/h5netcdf/issues/171
342
+ filters = v.filters()
343
+ assert filters["complevel"] == 4
344
+ assert filters["fletcher32"] is False
345
+ assert filters["shuffle"] is True
346
+ assert filters["zlib"] is True
303
347
 
304
348
  v = ds.variables["y"]
305
349
  assert array_equal(v, np.r_[np.arange(4), [-1]])
306
- assert v.dtype == int
350
+ assert v.dtype == intf
307
351
  assert v.dimensions == ("y",)
308
352
  assert v.ndim == 1
309
353
  assert v.ncattrs() == ["_FillValue"]
@@ -343,32 +387,33 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
343
387
 
344
388
  v = ds.variables["intscalar"]
345
389
  assert array_equal(v, np.array(2))
346
- assert v.dtype == "int64"
390
+ assert v.dtype == intf
347
391
  assert v.ndim == 0
348
392
  assert v.dimensions == ()
349
393
  assert v.ncattrs() == []
350
394
 
351
- v = ds.variables["var_len_str"]
352
- assert v.dtype == str
353
- assert v[0] == _vlen_string
395
+ if ds.data_model == "NETCDF4":
396
+ v = ds.variables["var_len_str"]
397
+ assert v.dtype == str
398
+ assert v[0] == _vlen_string
354
399
 
355
- v = ds.groups["subgroup"].variables["subvar"]
356
- assert ds.groups["subgroup"].parent is ds
357
- assert array_equal(v, np.arange(4.0))
358
- assert v.dtype == "int32"
359
- assert v.ndim == 1
360
- assert v.dimensions == ("x",)
361
- assert v.ncattrs() == []
400
+ v = ds.groups["subgroup"].variables["subvar"]
401
+ assert ds.groups["subgroup"].parent is ds
402
+ assert array_equal(v, np.arange(4.0))
403
+ assert v.dtype == "int32"
404
+ assert v.ndim == 1
405
+ assert v.dimensions == ("x",)
406
+ assert v.ncattrs() == []
362
407
 
363
- v = ds.groups["subgroup"].variables["y_var"]
364
- assert v.shape == (10,)
365
- assert "y" in ds.groups["subgroup"].dimensions
408
+ v = ds.groups["subgroup"].variables["y_var"]
409
+ assert v.shape == (10,)
410
+ assert "y" in ds.groups["subgroup"].dimensions
366
411
 
367
- enum_dict = dict(one=1, two=2, three=3, missing=255)
368
- enum_type = ds.enumtypes["enum_t"]
369
- assert enum_type.enum_dict == enum_dict
370
- v = ds.variables["enum_var"]
371
- assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
412
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
413
+ enum_type = ds.enumtypes["enum_t"]
414
+ assert enum_type.enum_dict == enum_dict
415
+ v = ds.variables["enum_var"]
416
+ assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
372
417
 
373
418
  ds.close()
374
419
 
@@ -376,33 +421,41 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
376
421
  def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
377
422
  remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
378
423
  ds = h5netcdf.File(tmp_netcdf, "r", **decode_vlen_strings)
424
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
425
+
379
426
  assert ds.name == "/"
380
427
  assert list(ds.attrs) == ["global", "other_attr"]
381
428
  assert ds.attrs["global"] == 42
382
429
  if write_module is not netCDF4:
383
430
  # skip for now: https://github.com/Unidata/netcdf4-python/issues/388
384
431
  assert ds.attrs["other_attr"] == "yes"
385
- assert set(ds.dimensions) == set(
386
- ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"]
387
- )
388
- variables = set(
389
- [
390
- "enum_var",
391
- "foo",
392
- "z",
393
- "intscalar",
394
- "scalar",
395
- "var_len_str",
396
- "mismatched_dim",
397
- "foo_unlimited",
398
- ]
399
- )
432
+ dimensions = {
433
+ "x",
434
+ "y",
435
+ "z",
436
+ "string3",
437
+ "mismatched_dim",
438
+ "unlimited",
439
+ }
440
+ variables = {
441
+ "foo",
442
+ "z",
443
+ "intscalar",
444
+ "scalar",
445
+ "foo_unlimited",
446
+ "mismatched_dim",
447
+ }
400
448
  # fix current failure of hsds/h5pyd
401
449
  if not remote_file:
402
- variables |= set(["y"])
450
+ variables |= {"y"}
451
+
452
+ if ds.data_model == "NETCDF4":
453
+ dimensions |= {"empty"}
454
+ variables |= {"enum_var", "var_len_str"}
455
+
456
+ assert set(ds.dimensions) == dimensions
403
457
  assert set(ds.variables) == variables
404
458
 
405
- assert set(ds.groups) == set(["subgroup"])
406
459
  assert ds.parent is None
407
460
 
408
461
  v = ds["foo"]
@@ -424,7 +477,7 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
424
477
  if not remote_file:
425
478
  v = ds["y"]
426
479
  assert array_equal(v, np.r_[np.arange(4), [-1]])
427
- assert v.dtype == int
480
+ assert v.dtype == intf
428
481
  assert v.dimensions == ("y",)
429
482
  assert v.ndim == 1
430
483
  assert list(v.attrs) == ["_FillValue"]
@@ -458,45 +511,48 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
458
511
 
459
512
  v = ds.variables["intscalar"]
460
513
  assert array_equal(v, np.array(2))
461
- assert v.dtype == "int64"
514
+ assert v.dtype == intf
462
515
  assert v.ndim == 0
463
516
  assert v.dimensions == ()
464
517
  assert list(v.attrs) == []
465
518
 
466
- v = ds["var_len_str"]
467
- assert h5py.check_dtype(vlen=v.dtype) is str
468
- if getattr(ds, "decode_vlen_strings", True):
469
- assert v[0] == _vlen_string
470
- else:
471
- assert v[0] == _vlen_string.encode("utf_8")
472
-
473
- v = ds["/subgroup/subvar"]
474
- assert v is ds["subgroup"]["subvar"]
475
- assert v is ds["subgroup/subvar"]
476
- assert v is ds["subgroup"]["/subgroup/subvar"]
477
- assert v.name == "/subgroup/subvar"
478
- assert ds["subgroup"].name == "/subgroup"
479
- assert ds["subgroup"].parent is ds
480
- assert array_equal(v, np.arange(4.0))
481
- assert v.dtype == "int32"
482
- assert v.ndim == 1
483
- assert v.dimensions == ("x",)
484
- assert list(v.attrs) == []
519
+ if ds.data_model == "NETCDF4":
520
+ assert set(ds.groups) == {"subgroup"}
485
521
 
486
- assert ds["/subgroup/y_var"].shape == (10,)
487
- assert ds["/subgroup"].dimensions["y"].size == 10
522
+ v = ds["var_len_str"]
523
+ assert h5py.check_dtype(vlen=v.dtype) is str
524
+ if getattr(ds, "decode_vlen_strings", True):
525
+ assert v[0] == _vlen_string
526
+ else:
527
+ assert v[0] == _vlen_string.encode("utf_8")
528
+
529
+ v = ds["/subgroup/subvar"]
530
+ assert v is ds["subgroup"]["subvar"]
531
+ assert v is ds["subgroup/subvar"]
532
+ assert v is ds["subgroup"]["/subgroup/subvar"]
533
+ assert v.name == "/subgroup/subvar"
534
+ assert ds["subgroup"].name == "/subgroup"
535
+ assert ds["subgroup"].parent is ds
536
+ assert array_equal(v, np.arange(4.0))
537
+ assert v.dtype == "int32"
538
+ assert v.ndim == 1
539
+ assert v.dimensions == ("x",)
540
+ assert list(v.attrs) == []
488
541
 
489
- enum_dict = dict(one=1, two=2, three=3, missing=255)
490
- enum_type = ds.enumtypes["enum_t"]
491
- assert enum_type.enum_dict == enum_dict
492
- v = ds.variables["enum_var"]
493
- assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
542
+ assert ds["/subgroup/y_var"].shape == (10,)
543
+ assert ds["/subgroup"].dimensions["y"].size == 10
544
+
545
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
546
+ enum_type = ds.enumtypes["enum_t"]
547
+ assert enum_type.enum_dict == enum_dict
548
+ v = ds.variables["enum_var"]
549
+ assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
494
550
 
495
551
  ds.close()
496
552
 
497
553
 
498
- def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module):
499
- write_legacy_netcdf(tmp_netcdf, write_module)
554
+ def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module, format="NETCDF4"):
555
+ write_legacy_netcdf(tmp_netcdf, write_module, format)
500
556
  read_legacy_netcdf(tmp_netcdf, read_module, write_module)
501
557
 
502
558
 
@@ -517,13 +573,15 @@ def test_write_h5netcdf_read_legacyapi(tmp_local_netcdf):
517
573
  read_legacy_netcdf(tmp_local_netcdf, legacyapi, h5netcdf)
518
574
 
519
575
 
520
- def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf):
521
- write_h5netcdf(tmp_local_netcdf)
576
+ def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf, data_model):
577
+ write_h5netcdf(tmp_local_netcdf, **data_model)
522
578
  read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
523
579
 
524
580
 
525
- def test_roundtrip_h5netcdf(tmp_local_or_remote_netcdf, decode_vlen_strings):
526
- write_h5netcdf(tmp_local_or_remote_netcdf)
581
+ def test_roundtrip_h5netcdf(
582
+ tmp_local_or_remote_netcdf, decode_vlen_strings, data_model
583
+ ):
584
+ write_h5netcdf(tmp_local_or_remote_netcdf, **data_model)
527
585
  read_h5netcdf(tmp_local_or_remote_netcdf, h5netcdf, decode_vlen_strings)
528
586
 
529
587
 
@@ -532,8 +590,8 @@ def test_write_compression_as_zlib(tmp_local_netcdf):
532
590
  read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
533
591
 
534
592
 
535
- def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
536
- write_legacy_netcdf(tmp_local_netcdf, netCDF4)
593
+ def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings, data_model):
594
+ write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
537
595
  read_h5netcdf(tmp_local_netcdf, netCDF4, decode_vlen_strings)
538
596
 
539
597
 
@@ -542,6 +600,22 @@ def test_write_legacyapi_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
542
600
  read_h5netcdf(tmp_local_netcdf, legacyapi, decode_vlen_strings)
543
601
 
544
602
 
603
+ @pytest.mark.parametrize("strict", [True, False])
604
+ @pytest.mark.parametrize("dataset", [None, "enum_var"])
605
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
606
+ def test_dump_netcdf4_vs_h5netcdf(
607
+ tmp_local_netcdf, dataset, h5dump, data_model, strict
608
+ ):
609
+ """Check that the generated file is identical to netCDF4 by comparing h5dump output."""
610
+ write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
611
+ expected = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
612
+
613
+ write_legacy_netcdf(tmp_local_netcdf, legacyapi, **data_model)
614
+ actual = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
615
+
616
+ assert actual == expected
617
+
618
+
545
619
  def test_fileobj(decode_vlen_strings):
546
620
  fileobj = tempfile.TemporaryFile()
547
621
  write_h5netcdf(fileobj)
@@ -845,13 +919,13 @@ def test_hierarchical_access_auto_create(tmp_local_or_remote_netcdf):
845
919
  ds.create_variable("/foo/bar", data=1)
846
920
  g = ds.create_group("foo/baz")
847
921
  g.create_variable("/foo/hello", data=2)
848
- assert set(ds) == set(["foo"])
849
- assert set(ds["foo"]) == set(["bar", "baz", "hello"])
922
+ assert set(ds) == {"foo"}
923
+ assert set(ds["foo"]) == {"bar", "baz", "hello"}
850
924
  ds.close()
851
925
 
852
926
  ds = h5netcdf.File(tmp_local_or_remote_netcdf, "r")
853
- assert set(ds) == set(["foo"])
854
- assert set(ds["foo"]) == set(["bar", "baz", "hello"])
927
+ assert set(ds) == {"foo"}
928
+ assert set(ds["foo"]) == {"bar", "baz", "hello"}
855
929
  ds.close()
856
930
 
857
931
 
@@ -1231,6 +1305,12 @@ def test_reading_unlimited_dimensions_created_with_c_api(tmp_local_netcdf):
1231
1305
  f.groups["test"]["dummy4"].shape == (0, 0)
1232
1306
  assert f["dummy5"].shape == (2, 3)
1233
1307
 
1308
+ # regression test for https://github.com/pydata/xarray/issues/10818
1309
+ # h5netcdf issue https://github.com/h5netcdf/h5netcdf/issues/287
1310
+ # slicing a variable with slice exceeding it's shape
1311
+ # should return only up to shape-size
1312
+ assert f["dummy1"][:10, :2].shape == (2, 2)
1313
+
1234
1314
 
1235
1315
  def test_reading_unused_unlimited_dimension(tmp_local_or_remote_netcdf):
1236
1316
  """Test reading a file with unused dimension of unlimited size"""
@@ -2274,9 +2354,7 @@ def test_vlen_string_dataset_fillvalue(tmp_local_netcdf, decode_vlen_strings):
2274
2354
  "ros3" not in h5py.registered_drivers(), reason="ros3 not available"
2275
2355
  )
2276
2356
  def test_ros3():
2277
- fname = (
2278
- "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
2279
- )
2357
+ fname = "https://archive.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
2280
2358
  f = h5netcdf.File(fname, "r", driver="ros3")
2281
2359
  assert "Temperature" in list(f)
2282
2360
  f.close()
@@ -2817,3 +2895,107 @@ def test_h5pyd_append(hsds_up):
2817
2895
 
2818
2896
  with h5netcdf.File(fname, "a", driver="h5pyd") as ds:
2819
2897
  assert ds._preexisting_file
2898
+
2899
+
2900
+ def test_raise_on_closed_file(tmp_local_netcdf):
2901
+ f = h5netcdf.File(tmp_local_netcdf, "w")
2902
+ f.dimensions = {"x": 5}
2903
+ v = f.create_variable("hello", ("x",), float)
2904
+ v[:] = np.ones(5)
2905
+ f.close()
2906
+ with pytest.raises(
2907
+ ValueError,
2908
+ match=f"I/O operation on <Closed h5netcdf.File>: '{tmp_local_netcdf}'",
2909
+ ):
2910
+ print(v[:])
2911
+
2912
+
2913
+ def write_legacy_string_array(tmp_netcdf, write_module, format):
2914
+ ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
2915
+
2916
+ # we do not handle "_Encoding"
2917
+ if write_module == netCDF4:
2918
+ ds.set_auto_chartostring(False)
2919
+
2920
+ data = np.array(
2921
+ [
2922
+ [b"apple ", b"berry ", b"cherry", b"dates ", b"elder "],
2923
+ [b"fig ", b"grape ", b"honey ", b"iris ", b"jelly "],
2924
+ [b"kiwi ", b"lemon ", b"mango ", b"nectar", b"olive "],
2925
+ [b"peach ", b"quince", b"raisin", b"salak ", b"tomat "],
2926
+ ],
2927
+ dtype="S6",
2928
+ )
2929
+
2930
+ data = string_to_char(data)
2931
+
2932
+ ds.createDimension("n1", None)
2933
+ ds.createDimension("n2", 5)
2934
+ ds.createDimension("nchar", 6)
2935
+
2936
+ v = ds.createVariable("strings", "S1", ("n1", "n2", "nchar"))
2937
+
2938
+ # netCDF4 can't resize with incomplete slices and unfitting dimensions
2939
+ if write_module == netCDF4:
2940
+ v[...] = data
2941
+
2942
+ v[:-1] = data[:-1]
2943
+ v[-1] = data[-1]
2944
+ v[-1, -1] = data[-1, -1]
2945
+ ds.close()
2946
+
2947
+
2948
+ @pytest.mark.parametrize("strict", [True, False])
2949
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
2950
+ def test_dump_string_array(tmp_local_netcdf, h5dump, data_model, strict):
2951
+ write_legacy_string_array(tmp_local_netcdf, netCDF4, **data_model)
2952
+ expected = h5dump(tmp_local_netcdf, strict=strict)
2953
+
2954
+ write_legacy_string_array(tmp_local_netcdf, legacyapi, **data_model)
2955
+ actual = h5dump(tmp_local_netcdf, strict=strict)
2956
+
2957
+ assert actual == expected
2958
+
2959
+
2960
+ def maybe_resize_with_broadcasting(tmp_netcdf, write_module, data_model):
2961
+ ds = write_module.Dataset(tmp_netcdf, mode="w", **data_model)
2962
+ n1, n2, n3 = 4, 5, 6
2963
+ data = np.arange(n1 * n2 * n3).reshape((n1, n2, n3))
2964
+
2965
+ ds.createDimension("n1", None)
2966
+ ds.createDimension("n2", n2)
2967
+ ds.createDimension("n3", n3)
2968
+
2969
+ v = ds.createVariable("numbers", "i4", ("n1", "n2", "n3"))
2970
+ # netcdf4-python doe not handle this, need to write once with full data
2971
+ if write_module == netCDF4:
2972
+ v[:] = data
2973
+ v[:-1] = data[:-1]
2974
+ v[-1] = data[-1]
2975
+ v[-1, -1] = data[-1, -1]
2976
+ ds.close()
2977
+
2978
+
2979
+ @pytest.mark.parametrize("dataset", [None, "numbers"])
2980
+ @pytest.mark.parametrize("strict", [True, False])
2981
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
2982
+ def test_dump_maybe_resize_with_broadcasting(
2983
+ tmp_local_netcdf, data_model, h5dump, dataset, strict
2984
+ ):
2985
+ maybe_resize_with_broadcasting(tmp_local_netcdf, netCDF4, data_model)
2986
+ expected = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
2987
+
2988
+ maybe_resize_with_broadcasting(tmp_local_netcdf, legacyapi, data_model)
2989
+ actual = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
2990
+
2991
+ assert actual == expected
2992
+
2993
+
2994
+ def test_is_classic(tmp_local_netcdf):
2995
+ """Check that the generated file is recognized as netCDF-4 classic model by ncdump."""
2996
+ import subprocess
2997
+
2998
+ write_h5netcdf(tmp_local_netcdf, format="NETCDF4_CLASSIC")
2999
+
3000
+ out = subprocess.run(["ncdump", "-k", tmp_local_netcdf], capture_output=True)
3001
+ assert out.stdout.decode().strip() == "netCDF-4 classic model"