h5netcdf 1.6.4__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of h5netcdf might be problematic. Click here for more details.

@@ -133,22 +133,27 @@ def is_h5py_char_working(tmp_netcdf, name):
133
133
  raise
134
134
 
135
135
 
136
- def write_legacy_netcdf(tmp_netcdf, write_module):
137
- ds = write_module.Dataset(tmp_netcdf, "w")
138
- ds.setncattr("global", 42)
136
+ def write_legacy_netcdf(tmp_netcdf, write_module, format="NETCDF4"):
137
+ ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
138
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
139
+
140
+ ds.setncattr("global", intf(42))
139
141
  ds.other_attr = "yes"
140
142
  ds.createDimension("x", 4)
141
143
  ds.createDimension("y", 5)
142
144
  ds.createDimension("z", 6)
143
- ds.createDimension("empty", 0)
144
145
  ds.createDimension("string3", 3)
145
146
  ds.createDimension("unlimited", None)
146
147
 
148
+ if ds.data_model == "NETCDF4":
149
+ # In the CLASSIC format, only one unlimited dimension is allowed
150
+ ds.createDimension("empty", 0)
151
+
147
152
  v = ds.createVariable("foo", float, ("x", "y"), chunksizes=(4, 5), zlib=True)
148
153
  v[...] = 1
149
154
  v.setncattr("units", "meters")
150
155
 
151
- v = ds.createVariable("y", int, ("y",), fill_value=-1)
156
+ v = ds.createVariable("y", intf, ("y",), fill_value=intf(-1))
152
157
  v[:4] = np.arange(4)
153
158
 
154
159
  v = ds.createVariable("z", "S1", ("z", "string3"), fill_value=b"X")
@@ -158,7 +163,7 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
158
163
  v[...] = 2.0
159
164
 
160
165
  # test creating a scalar with compression option (with should be ignored)
161
- v = ds.createVariable("intscalar", np.int64, (), zlib=6, fill_value=None)
166
+ v = ds.createVariable("intscalar", intf, (), zlib=6, fill_value=None)
162
167
  v[...] = 2
163
168
 
164
169
  v = ds.createVariable("foo_unlimited", float, ("x", "unlimited"))
@@ -170,37 +175,60 @@ def write_legacy_netcdf(tmp_netcdf, write_module):
170
175
  ):
171
176
  ds.createVariable("boolean", np.bool_, ("x"))
172
177
 
173
- g = ds.createGroup("subgroup")
174
- v = g.createVariable("subvar", np.int32, ("x",))
175
- v[...] = np.arange(4.0)
178
+ ds.createDimension("mismatched_dim", 1)
179
+ ds.createVariable("mismatched_dim", intf, ())
176
180
 
177
- g.createDimension("y", 10)
178
- g.createVariable("y_var", float, ("y",))
181
+ if ds.data_model == "NETCDF4":
182
+ g = ds.createGroup("subgroup")
183
+ v = g.createVariable("subvar", np.int32, ("x",))
184
+ v[...] = np.arange(4.0)
179
185
 
180
- ds.createDimension("mismatched_dim", 1)
181
- ds.createVariable("mismatched_dim", int, ())
186
+ g.createDimension("y", 10)
187
+ g.createVariable("y_var", float, ("y",))
182
188
 
183
- v = ds.createVariable("var_len_str", str, ("x"))
184
- v[0] = "foo"
189
+ v = ds.createVariable("var_len_str", str, ("x"))
190
+ v[0] = "foo"
185
191
 
186
- enum_dict = dict(one=1, two=2, three=3, missing=255)
187
- enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
188
- v = ds.createVariable(
189
- "enum_var",
190
- enum_type,
191
- ("x",),
192
- fill_value=enum_dict["missing"],
193
- )
194
- v[0:3] = [1, 2, 3]
192
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
193
+ enum_type = ds.createEnumType(np.uint8, "enum_t", enum_dict)
194
+ v = ds.createVariable(
195
+ "enum_var",
196
+ enum_type,
197
+ ("x",),
198
+ fill_value=enum_dict["missing"],
199
+ )
200
+ v[0:3] = [1, 2, 3]
195
201
 
196
202
  ds.close()
197
203
 
198
204
 
199
- def write_h5netcdf(tmp_netcdf, compression="gzip"):
200
- ds = h5netcdf.File(tmp_netcdf, "w")
201
- ds.attrs["global"] = 42
205
+ def write_h5netcdf(tmp_netcdf, compression="gzip", format="NETCDF4"):
206
+ ds = h5netcdf.File(tmp_netcdf, mode="w", format=format)
207
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
208
+ ds.attrs["global"] = intf(42)
202
209
  ds.attrs["other_attr"] = "yes"
203
- ds.dimensions = {"x": 4, "y": 5, "z": 6, "empty": 0, "unlimited": None}
210
+
211
+ if ds.data_model == "NETCDF4_CLASSIC":
212
+ with raises(
213
+ CompatibilityError,
214
+ match="NETCDF4_CLASSIC format only allows one unlimited dimension.",
215
+ ):
216
+ ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None, "empty": 0}
217
+
218
+ ds.dimensions = {"x": 4, "y": 5, "z": 6, "unlimited": None}
219
+
220
+ if ds.data_model == "NETCDF4":
221
+ ds.dimensions["empty"] = 0
222
+
223
+ if ds.data_model == "NETCDF4_CLASSIC":
224
+ with raises(
225
+ CompatibilityError,
226
+ match="Only one unlimited dimension allowed in the NETCDF4_CLASSIC format.",
227
+ ):
228
+ ds.dimensions["empty"] = 0
229
+
230
+ with raises(CompatibilityError, match=r"int64 \(CLASSIC\) dtypes"):
231
+ ds.attrs["int64_attr"] = 42
204
232
 
205
233
  v = ds.create_variable(
206
234
  "foo", ("x", "y"), float, chunks=(4, 5), compression=compression, shuffle=True
@@ -210,14 +238,14 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
210
238
 
211
239
  remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
212
240
  if not remote_file:
213
- v = ds.create_variable("y", ("y",), int, fillvalue=-1)
241
+ v = ds.create_variable("y", ("y",), intf, fillvalue=intf(-1))
214
242
  v[:4] = np.arange(4)
215
243
 
216
244
  v = ds.create_variable("z", ("z", "string3"), data=_char_array, fillvalue=b"X")
217
245
 
218
246
  v = ds.create_variable("scalar", data=np.float32(2.0))
219
247
 
220
- v = ds.create_variable("intscalar", data=np.int64(2))
248
+ v = ds.create_variable("intscalar", data=intf(2))
221
249
 
222
250
  v = ds.create_variable("foo_unlimited", ("x", "unlimited"), float)
223
251
  v[...] = 1
@@ -225,36 +253,40 @@ def write_h5netcdf(tmp_netcdf, compression="gzip"):
225
253
  with raises((h5netcdf.CompatibilityError, TypeError)):
226
254
  ds.create_variable("boolean", data=True)
227
255
 
228
- g = ds.create_group("subgroup")
229
- v = g.create_variable("subvar", ("x",), np.int32)
230
- v[...] = np.arange(4.0)
231
- with raises(AttributeError):
232
- v.attrs["_Netcdf4Dimid"] = -1
233
-
234
- g.dimensions["y"] = 10
235
- g.create_variable("y_var", ("y",), float)
236
- g.flush()
237
-
238
256
  ds.dimensions["mismatched_dim"] = 1
239
- ds.create_variable("mismatched_dim", dtype=int)
257
+ ds.create_variable("mismatched_dim", dtype=intf)
240
258
  ds.flush()
241
259
 
242
- dt = h5py.special_dtype(vlen=str)
243
- v = ds.create_variable("var_len_str", ("x",), dtype=dt)
244
- v[0] = _vlen_string
260
+ if ds.data_model == "NETCDF4":
261
+ g = ds.create_group("subgroup")
262
+ v = g.create_variable("subvar", ("x",), np.int32)
263
+ v[...] = np.arange(4.0)
264
+ with raises(AttributeError):
265
+ v.attrs["_Netcdf4Dimid"] = -1
245
266
 
246
- enum_dict = dict(one=1, two=2, three=3, missing=255)
247
- enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
248
- v = ds.create_variable(
249
- "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
250
- )
251
- v[0:3] = [1, 2, 3]
267
+ g.dimensions["y"] = 10
268
+ g.create_variable("y_var", ("y",), float)
269
+ g.flush()
270
+
271
+ dt = h5py.special_dtype(vlen=str)
272
+ v = ds.create_variable("var_len_str", ("x",), dtype=dt)
273
+ v[0] = _vlen_string
274
+
275
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
276
+ enum_type = ds.create_enumtype(np.uint8, "enum_t", enum_dict)
277
+ v = ds.create_variable(
278
+ "enum_var", ("x",), dtype=enum_type, fillvalue=enum_dict["missing"]
279
+ )
280
+ v[0:3] = [1, 2, 3]
252
281
 
253
282
  ds.close()
254
283
 
255
284
 
256
285
  def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
257
286
  ds = read_module.Dataset(tmp_netcdf, "r")
287
+
288
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
289
+
258
290
  assert ds.ncattrs() == ["global", "other_attr"]
259
291
  assert ds.getncattr("global") == 42
260
292
  if write_module is not netCDF4:
@@ -262,52 +294,60 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
262
294
  assert ds.other_attr == "yes"
263
295
  with raises(AttributeError, match="not found"):
264
296
  ds.does_not_exist
265
- assert set(ds.dimensions) == {
297
+ dimensions = {
266
298
  "x",
267
299
  "y",
268
300
  "z",
269
- "empty",
270
301
  "string3",
271
302
  "mismatched_dim",
272
303
  "unlimited",
273
304
  }
274
- assert set(ds.variables) == {
275
- "enum_var",
305
+ variables = {
276
306
  "foo",
277
307
  "y",
278
308
  "z",
279
309
  "intscalar",
280
310
  "scalar",
281
- "var_len_str",
282
- "mismatched_dim",
283
311
  "foo_unlimited",
312
+ "mismatched_dim",
284
313
  }
314
+ if ds.data_model == "NETCDF4":
315
+ dimensions |= {"empty"}
316
+ variables |= {
317
+ "enum_var",
318
+ "var_len_str",
319
+ }
285
320
 
286
- assert set(ds.enumtypes) == {"enum_t"}
321
+ assert set(ds.dimensions) == dimensions
322
+ assert set(ds.variables) == variables
287
323
 
288
- assert set(ds.groups) == {"subgroup"}
289
- assert ds.parent is None
290
- v = ds.variables["foo"]
291
- assert array_equal(v, np.ones((4, 5)))
292
- assert v.dtype == float
293
- assert v.dimensions == ("x", "y")
294
- assert v.ndim == 2
295
- assert v.ncattrs() == ["units"]
296
- if write_module is not netCDF4:
297
- assert v.getncattr("units") == "meters"
298
- assert tuple(v.chunking()) == (4, 5)
324
+ if ds.data_model == "NETCDF4":
325
+ assert set(ds.enumtypes) == {"enum_t"}
299
326
 
300
- # check for dict items separately
301
- # see https://github.com/h5netcdf/h5netcdf/issues/171
302
- filters = v.filters()
303
- assert filters["complevel"] == 4
304
- assert filters["fletcher32"] is False
305
- assert filters["shuffle"] is True
306
- assert filters["zlib"] is True
327
+ assert set(ds.groups) == {"subgroup"}
328
+ assert ds.parent is None
329
+ v = ds.variables["foo"]
330
+ assert array_equal(v, np.ones((4, 5)))
331
+ assert v.dtype == float
332
+ assert v.dimensions == ("x", "y")
333
+ assert v.ndim == 2
334
+ assert v.ncattrs() == ["units"]
335
+
336
+ if write_module is not netCDF4:
337
+ assert v.getncattr("units") == "meters"
338
+ assert tuple(v.chunking()) == (4, 5)
339
+
340
+ # check for dict items separately
341
+ # see https://github.com/h5netcdf/h5netcdf/issues/171
342
+ filters = v.filters()
343
+ assert filters["complevel"] == 4
344
+ assert filters["fletcher32"] is False
345
+ assert filters["shuffle"] is True
346
+ assert filters["zlib"] is True
307
347
 
308
348
  v = ds.variables["y"]
309
349
  assert array_equal(v, np.r_[np.arange(4), [-1]])
310
- assert v.dtype == int
350
+ assert v.dtype == intf
311
351
  assert v.dimensions == ("y",)
312
352
  assert v.ndim == 1
313
353
  assert v.ncattrs() == ["_FillValue"]
@@ -347,32 +387,33 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
347
387
 
348
388
  v = ds.variables["intscalar"]
349
389
  assert array_equal(v, np.array(2))
350
- assert v.dtype == "int64"
390
+ assert v.dtype == intf
351
391
  assert v.ndim == 0
352
392
  assert v.dimensions == ()
353
393
  assert v.ncattrs() == []
354
394
 
355
- v = ds.variables["var_len_str"]
356
- assert v.dtype == str
357
- assert v[0] == _vlen_string
395
+ if ds.data_model == "NETCDF4":
396
+ v = ds.variables["var_len_str"]
397
+ assert v.dtype == str
398
+ assert v[0] == _vlen_string
358
399
 
359
- v = ds.groups["subgroup"].variables["subvar"]
360
- assert ds.groups["subgroup"].parent is ds
361
- assert array_equal(v, np.arange(4.0))
362
- assert v.dtype == "int32"
363
- assert v.ndim == 1
364
- assert v.dimensions == ("x",)
365
- assert v.ncattrs() == []
400
+ v = ds.groups["subgroup"].variables["subvar"]
401
+ assert ds.groups["subgroup"].parent is ds
402
+ assert array_equal(v, np.arange(4.0))
403
+ assert v.dtype == "int32"
404
+ assert v.ndim == 1
405
+ assert v.dimensions == ("x",)
406
+ assert v.ncattrs() == []
366
407
 
367
- v = ds.groups["subgroup"].variables["y_var"]
368
- assert v.shape == (10,)
369
- assert "y" in ds.groups["subgroup"].dimensions
408
+ v = ds.groups["subgroup"].variables["y_var"]
409
+ assert v.shape == (10,)
410
+ assert "y" in ds.groups["subgroup"].dimensions
370
411
 
371
- enum_dict = dict(one=1, two=2, three=3, missing=255)
372
- enum_type = ds.enumtypes["enum_t"]
373
- assert enum_type.enum_dict == enum_dict
374
- v = ds.variables["enum_var"]
375
- assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
412
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
413
+ enum_type = ds.enumtypes["enum_t"]
414
+ assert enum_type.enum_dict == enum_dict
415
+ v = ds.variables["enum_var"]
416
+ assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
376
417
 
377
418
  ds.close()
378
419
 
@@ -380,37 +421,41 @@ def read_legacy_netcdf(tmp_netcdf, read_module, write_module):
380
421
  def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
381
422
  remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)
382
423
  ds = h5netcdf.File(tmp_netcdf, "r", **decode_vlen_strings)
424
+ intf = np.int64 if ds.data_model == "NETCDF4" else np.int32
425
+
383
426
  assert ds.name == "/"
384
427
  assert list(ds.attrs) == ["global", "other_attr"]
385
428
  assert ds.attrs["global"] == 42
386
429
  if write_module is not netCDF4:
387
430
  # skip for now: https://github.com/Unidata/netcdf4-python/issues/388
388
431
  assert ds.attrs["other_attr"] == "yes"
389
- assert set(ds.dimensions) == {
432
+ dimensions = {
390
433
  "x",
391
434
  "y",
392
435
  "z",
393
- "empty",
394
436
  "string3",
395
437
  "mismatched_dim",
396
438
  "unlimited",
397
439
  }
398
440
  variables = {
399
- "enum_var",
400
441
  "foo",
401
442
  "z",
402
443
  "intscalar",
403
444
  "scalar",
404
- "var_len_str",
405
- "mismatched_dim",
406
445
  "foo_unlimited",
446
+ "mismatched_dim",
407
447
  }
408
448
  # fix current failure of hsds/h5pyd
409
449
  if not remote_file:
410
450
  variables |= {"y"}
451
+
452
+ if ds.data_model == "NETCDF4":
453
+ dimensions |= {"empty"}
454
+ variables |= {"enum_var", "var_len_str"}
455
+
456
+ assert set(ds.dimensions) == dimensions
411
457
  assert set(ds.variables) == variables
412
458
 
413
- assert set(ds.groups) == {"subgroup"}
414
459
  assert ds.parent is None
415
460
 
416
461
  v = ds["foo"]
@@ -432,7 +477,7 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
432
477
  if not remote_file:
433
478
  v = ds["y"]
434
479
  assert array_equal(v, np.r_[np.arange(4), [-1]])
435
- assert v.dtype == int
480
+ assert v.dtype == intf
436
481
  assert v.dimensions == ("y",)
437
482
  assert v.ndim == 1
438
483
  assert list(v.attrs) == ["_FillValue"]
@@ -466,45 +511,48 @@ def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings):
466
511
 
467
512
  v = ds.variables["intscalar"]
468
513
  assert array_equal(v, np.array(2))
469
- assert v.dtype == "int64"
514
+ assert v.dtype == intf
470
515
  assert v.ndim == 0
471
516
  assert v.dimensions == ()
472
517
  assert list(v.attrs) == []
473
518
 
474
- v = ds["var_len_str"]
475
- assert h5py.check_dtype(vlen=v.dtype) is str
476
- if getattr(ds, "decode_vlen_strings", True):
477
- assert v[0] == _vlen_string
478
- else:
479
- assert v[0] == _vlen_string.encode("utf_8")
480
-
481
- v = ds["/subgroup/subvar"]
482
- assert v is ds["subgroup"]["subvar"]
483
- assert v is ds["subgroup/subvar"]
484
- assert v is ds["subgroup"]["/subgroup/subvar"]
485
- assert v.name == "/subgroup/subvar"
486
- assert ds["subgroup"].name == "/subgroup"
487
- assert ds["subgroup"].parent is ds
488
- assert array_equal(v, np.arange(4.0))
489
- assert v.dtype == "int32"
490
- assert v.ndim == 1
491
- assert v.dimensions == ("x",)
492
- assert list(v.attrs) == []
519
+ if ds.data_model == "NETCDF4":
520
+ assert set(ds.groups) == {"subgroup"}
493
521
 
494
- assert ds["/subgroup/y_var"].shape == (10,)
495
- assert ds["/subgroup"].dimensions["y"].size == 10
522
+ v = ds["var_len_str"]
523
+ assert h5py.check_dtype(vlen=v.dtype) is str
524
+ if getattr(ds, "decode_vlen_strings", True):
525
+ assert v[0] == _vlen_string
526
+ else:
527
+ assert v[0] == _vlen_string.encode("utf_8")
528
+
529
+ v = ds["/subgroup/subvar"]
530
+ assert v is ds["subgroup"]["subvar"]
531
+ assert v is ds["subgroup/subvar"]
532
+ assert v is ds["subgroup"]["/subgroup/subvar"]
533
+ assert v.name == "/subgroup/subvar"
534
+ assert ds["subgroup"].name == "/subgroup"
535
+ assert ds["subgroup"].parent is ds
536
+ assert array_equal(v, np.arange(4.0))
537
+ assert v.dtype == "int32"
538
+ assert v.ndim == 1
539
+ assert v.dimensions == ("x",)
540
+ assert list(v.attrs) == []
496
541
 
497
- enum_dict = dict(one=1, two=2, three=3, missing=255)
498
- enum_type = ds.enumtypes["enum_t"]
499
- assert enum_type.enum_dict == enum_dict
500
- v = ds.variables["enum_var"]
501
- assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
542
+ assert ds["/subgroup/y_var"].shape == (10,)
543
+ assert ds["/subgroup"].dimensions["y"].size == 10
544
+
545
+ enum_dict = dict(one=1, two=2, three=3, missing=255)
546
+ enum_type = ds.enumtypes["enum_t"]
547
+ assert enum_type.enum_dict == enum_dict
548
+ v = ds.variables["enum_var"]
549
+ assert array_equal(v, np.ma.masked_equal([1, 2, 3, 255], 255))
502
550
 
503
551
  ds.close()
504
552
 
505
553
 
506
- def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module):
507
- write_legacy_netcdf(tmp_netcdf, write_module)
554
+ def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module, format="NETCDF4"):
555
+ write_legacy_netcdf(tmp_netcdf, write_module, format)
508
556
  read_legacy_netcdf(tmp_netcdf, read_module, write_module)
509
557
 
510
558
 
@@ -525,13 +573,15 @@ def test_write_h5netcdf_read_legacyapi(tmp_local_netcdf):
525
573
  read_legacy_netcdf(tmp_local_netcdf, legacyapi, h5netcdf)
526
574
 
527
575
 
528
- def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf):
529
- write_h5netcdf(tmp_local_netcdf)
576
+ def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf, data_model):
577
+ write_h5netcdf(tmp_local_netcdf, **data_model)
530
578
  read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
531
579
 
532
580
 
533
- def test_roundtrip_h5netcdf(tmp_local_or_remote_netcdf, decode_vlen_strings):
534
- write_h5netcdf(tmp_local_or_remote_netcdf)
581
+ def test_roundtrip_h5netcdf(
582
+ tmp_local_or_remote_netcdf, decode_vlen_strings, data_model
583
+ ):
584
+ write_h5netcdf(tmp_local_or_remote_netcdf, **data_model)
535
585
  read_h5netcdf(tmp_local_or_remote_netcdf, h5netcdf, decode_vlen_strings)
536
586
 
537
587
 
@@ -540,8 +590,8 @@ def test_write_compression_as_zlib(tmp_local_netcdf):
540
590
  read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf)
541
591
 
542
592
 
543
- def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
544
- write_legacy_netcdf(tmp_local_netcdf, netCDF4)
593
+ def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings, data_model):
594
+ write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
545
595
  read_h5netcdf(tmp_local_netcdf, netCDF4, decode_vlen_strings)
546
596
 
547
597
 
@@ -550,6 +600,22 @@ def test_write_legacyapi_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings):
550
600
  read_h5netcdf(tmp_local_netcdf, legacyapi, decode_vlen_strings)
551
601
 
552
602
 
603
+ @pytest.mark.parametrize("strict", [True, False])
604
+ @pytest.mark.parametrize("dataset", [None, "enum_var"])
605
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
606
+ def test_dump_netcdf4_vs_h5netcdf(
607
+ tmp_local_netcdf, dataset, h5dump, data_model, strict
608
+ ):
609
+ """Check that the generated file is identical to netCDF4 by comparing h5dump output."""
610
+ write_legacy_netcdf(tmp_local_netcdf, netCDF4, **data_model)
611
+ expected = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
612
+
613
+ write_legacy_netcdf(tmp_local_netcdf, legacyapi, **data_model)
614
+ actual = h5dump(tmp_local_netcdf, dataset=dataset, strict=strict)
615
+
616
+ assert actual == expected
617
+
618
+
553
619
  def test_fileobj(decode_vlen_strings):
554
620
  fileobj = tempfile.TemporaryFile()
555
621
  write_h5netcdf(fileobj)
@@ -1239,6 +1305,12 @@ def test_reading_unlimited_dimensions_created_with_c_api(tmp_local_netcdf):
1239
1305
  f.groups["test"]["dummy4"].shape == (0, 0)
1240
1306
  assert f["dummy5"].shape == (2, 3)
1241
1307
 
1308
+ # regression test for https://github.com/pydata/xarray/issues/10818
1309
+ # h5netcdf issue https://github.com/h5netcdf/h5netcdf/issues/287
1310
+ # slicing a variable with slice exceeding it's shape
1311
+ # should return only up to shape-size
1312
+ assert f["dummy1"][:10, :2].shape == (2, 2)
1313
+
1242
1314
 
1243
1315
  def test_reading_unused_unlimited_dimension(tmp_local_or_remote_netcdf):
1244
1316
  """Test reading a file with unused dimension of unlimited size"""
@@ -1774,10 +1846,10 @@ def test_track_order_specification(tmp_local_netcdf):
1774
1846
  # This should always work with the default file opening settings
1775
1847
  # https://github.com/h5netcdf/h5netcdf/issues/136#issuecomment-1017457067
1776
1848
  def test_more_than_7_attr_creation(tmp_local_netcdf):
1777
- with h5netcdf.File(tmp_local_netcdf, "w") as _h5file:
1849
+ with h5netcdf.File(tmp_local_netcdf, "w") as h5file:
1778
1850
  for i in range(100):
1779
- _h5file.attrs[f"key{i}"] = i
1780
- _h5file.attrs[f"key{i}"] = 0
1851
+ h5file.attrs[f"key{i}"] = i
1852
+ h5file.attrs[f"key{i}"] = 0
1781
1853
 
1782
1854
 
1783
1855
  # Add a test that is supposed to fail in relation to issue #136
@@ -1786,10 +1858,10 @@ def test_more_than_7_attr_creation(tmp_local_netcdf):
1786
1858
  # https://github.com/h5netcdf/h5netcdf/issues/136#issuecomment-1017457067
1787
1859
  @pytest.mark.parametrize("track_order", [False, True])
1788
1860
  def test_more_than_7_attr_creation_track_order(tmp_local_netcdf, track_order):
1789
- with h5netcdf.File(tmp_local_netcdf, "w", track_order=track_order) as _h5file:
1861
+ with h5netcdf.File(tmp_local_netcdf, "w", track_order=track_order) as h5file:
1790
1862
  for i in range(100):
1791
- _h5file.attrs[f"key{i}"] = i
1792
- _h5file.attrs[f"key{i}"] = 0
1863
+ h5file.attrs[f"key{i}"] = i
1864
+ h5file.attrs[f"key{i}"] = 0
1793
1865
 
1794
1866
 
1795
1867
  def test_group_names(tmp_local_netcdf):
@@ -2282,9 +2354,7 @@ def test_vlen_string_dataset_fillvalue(tmp_local_netcdf, decode_vlen_strings):
2282
2354
  "ros3" not in h5py.registered_drivers(), reason="ros3 not available"
2283
2355
  )
2284
2356
  def test_ros3():
2285
- fname = (
2286
- "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
2287
- )
2357
+ fname = "https://archive.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
2288
2358
  f = h5netcdf.File(fname, "r", driver="ros3")
2289
2359
  assert "Temperature" in list(f)
2290
2360
  f.close()
@@ -2838,3 +2908,94 @@ def test_raise_on_closed_file(tmp_local_netcdf):
2838
2908
  match=f"I/O operation on <Closed h5netcdf.File>: '{tmp_local_netcdf}'",
2839
2909
  ):
2840
2910
  print(v[:])
2911
+
2912
+
2913
+ def write_legacy_string_array(tmp_netcdf, write_module, format):
2914
+ ds = write_module.Dataset(tmp_netcdf, mode="w", format=format)
2915
+
2916
+ # we do not handle "_Encoding"
2917
+ if write_module == netCDF4:
2918
+ ds.set_auto_chartostring(False)
2919
+
2920
+ data = np.array(
2921
+ [
2922
+ [b"apple ", b"berry ", b"cherry", b"dates ", b"elder "],
2923
+ [b"fig ", b"grape ", b"honey ", b"iris ", b"jelly "],
2924
+ [b"kiwi ", b"lemon ", b"mango ", b"nectar", b"olive "],
2925
+ [b"peach ", b"quince", b"raisin", b"salak ", b"tomat "],
2926
+ ],
2927
+ dtype="S6",
2928
+ )
2929
+
2930
+ data = string_to_char(data)
2931
+
2932
+ ds.createDimension("n1", None)
2933
+ ds.createDimension("n2", 5)
2934
+ ds.createDimension("nchar", 6)
2935
+
2936
+ v = ds.createVariable("strings", "S1", ("n1", "n2", "nchar"))
2937
+
2938
+ # netCDF4 can't resize with incomplete slices and unfitting dimensions
2939
+ if write_module == netCDF4:
2940
+ v[...] = data
2941
+
2942
+ v[:-1] = data[:-1]
2943
+ v[-1] = data[-1]
2944
+ v[-1, -1] = data[-1, -1]
2945
+ ds.close()
2946
+
2947
+
2948
+ @pytest.mark.parametrize("strict", [True, False])
2949
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
2950
+ def test_dump_string_array(tmp_local_netcdf, h5dump, data_model, strict):
2951
+ write_legacy_string_array(tmp_local_netcdf, netCDF4, **data_model)
2952
+ expected = h5dump(tmp_local_netcdf, strict=strict)
2953
+
2954
+ write_legacy_string_array(tmp_local_netcdf, legacyapi, **data_model)
2955
+ actual = h5dump(tmp_local_netcdf, strict=strict)
2956
+
2957
+ assert actual == expected
2958
+
2959
+
2960
+ def maybe_resize_with_broadcasting(tmp_netcdf, write_module, data_model):
2961
+ ds = write_module.Dataset(tmp_netcdf, mode="w", **data_model)
2962
+ n1, n2, n3 = 4, 5, 6
2963
+ data = np.arange(n1 * n2 * n3).reshape((n1, n2, n3))
2964
+
2965
+ ds.createDimension("n1", None)
2966
+ ds.createDimension("n2", n2)
2967
+ ds.createDimension("n3", n3)
2968
+
2969
+ v = ds.createVariable("numbers", "i4", ("n1", "n2", "n3"))
2970
+ # netcdf4-python doe not handle this, need to write once with full data
2971
+ if write_module == netCDF4:
2972
+ v[:] = data
2973
+ v[:-1] = data[:-1]
2974
+ v[-1] = data[-1]
2975
+ v[-1, -1] = data[-1, -1]
2976
+ ds.close()
2977
+
2978
+
2979
+ @pytest.mark.parametrize("dataset", [None, "numbers"])
2980
+ @pytest.mark.parametrize("strict", [True, False])
2981
+ @pytest.mark.xfail(reason="Differences between netcdf4/h5netcdf")
2982
+ def test_dump_maybe_resize_with_broadcasting(
2983
+ tmp_local_netcdf, data_model, h5dump, dataset, strict
2984
+ ):
2985
+ maybe_resize_with_broadcasting(tmp_local_netcdf, netCDF4, data_model)
2986
+ expected = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
2987
+
2988
+ maybe_resize_with_broadcasting(tmp_local_netcdf, legacyapi, data_model)
2989
+ actual = h5dump(tmp_local_netcdf, strict=strict, dataset=dataset)
2990
+
2991
+ assert actual == expected
2992
+
2993
+
2994
+ def test_is_classic(tmp_local_netcdf):
2995
+ """Check that the generated file is recognized as netCDF-4 classic model by ncdump."""
2996
+ import subprocess
2997
+
2998
+ write_h5netcdf(tmp_local_netcdf, format="NETCDF4_CLASSIC")
2999
+
3000
+ out = subprocess.run(["ncdump", "-k", tmp_local_netcdf], capture_output=True)
3001
+ assert out.stdout.decode().strip() == "netCDF-4 classic model"