lazynwb 1.0.0.dev2__tar.gz → 1.0.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {lazynwb-1.0.0.dev2/src/lazynwb.egg-info → lazynwb-1.0.0.dev3}/PKG-INFO +1 -1
  2. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/pyproject.toml +1 -1
  3. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/lazyframe.py +146 -0
  4. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3/src/lazynwb.egg-info}/PKG-INFO +1 -1
  5. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_lazyframe.py +37 -2
  6. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/LICENSE +0 -0
  7. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/README.md +0 -0
  8. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/setup.cfg +0 -0
  9. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/__init__.py +0 -0
  10. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cache/__init__.py +0 -0
  11. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cache/sqlite.py +0 -0
  12. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/__init__.py +0 -0
  13. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/_schema.py +0 -0
  14. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/accessor.py +0 -0
  15. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/backend.py +0 -0
  16. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/models.py +0 -0
  17. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_catalog/polars.py +0 -0
  18. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/__init__.py +0 -0
  19. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_config.py +0 -0
  20. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_context.py +0 -0
  21. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_errors.py +0 -0
  22. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_formatting.py +0 -0
  23. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_main.py +0 -0
  24. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_preview.py +0 -0
  25. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_query.py +0 -0
  26. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_schema.py +0 -0
  27. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_sources.py +0 -0
  28. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_cli/_tables.py +0 -0
  29. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_config.py +0 -0
  30. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_hdf5/__init__.py +0 -0
  31. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_hdf5/parser.py +0 -0
  32. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_hdf5/range_reader.py +0 -0
  33. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_hdf5/reader.py +0 -0
  34. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_storage_options.py +0 -0
  35. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_zarr/__init__.py +0 -0
  36. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/_zarr/reader.py +0 -0
  37. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/attrs.py +0 -0
  38. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/base.py +0 -0
  39. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/conversion.py +0 -0
  40. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/dandi.py +0 -0
  41. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/exceptions.py +0 -0
  42. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/file_io.py +0 -0
  43. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/py.typed +0 -0
  44. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/table_metadata.py +0 -0
  45. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/tables.py +0 -0
  46. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/timeseries.py +0 -0
  47. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/types_.py +0 -0
  48. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb/utils.py +0 -0
  49. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb.egg-info/SOURCES.txt +0 -0
  50. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb.egg-info/dependency_links.txt +0 -0
  51. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb.egg-info/entry_points.txt +0 -0
  52. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb.egg-info/requires.txt +0 -0
  53. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/src/lazynwb.egg-info/top_level.txt +0 -0
  54. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_attrs.py +0 -0
  55. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_cache.py +0 -0
  56. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_catalog_backend.py +0 -0
  57. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_catalog_schema.py +0 -0
  58. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_cli.py +0 -0
  59. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_conversion.py +0 -0
  60. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_core.py +0 -0
  61. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_dandi.py +0 -0
  62. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_dandi_metadata.py +0 -0
  63. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_dandi_tables.py +0 -0
  64. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_dandi_timeseries.py +0 -0
  65. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_dandi_unit.py +0 -0
  66. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_file_io.py +0 -0
  67. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_hdf5_backend.py +0 -0
  68. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_range_reader.py +0 -0
  69. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_remote_schema_budget.py +0 -0
  70. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_speed.py +0 -0
  71. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_table_metadata.py +0 -0
  72. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_tables.py +0 -0
  73. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_timeseries.py +0 -0
  74. {lazynwb-1.0.0.dev2 → lazynwb-1.0.0.dev3}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lazynwb
3
- Version: 1.0.0.dev2
3
+ Version: 1.0.0.dev3
4
4
  Summary: An attempt to speed-up access to large NWB (Neurodata Without Borders) files stored in the cloud.
5
5
  Author-email: Ben Hardcastle <ben.hardcastle@alleninstitue.org>
6
6
  License: MIT
@@ -24,7 +24,7 @@ dependencies = [
24
24
  "aiosqlite>=0.20.0",
25
25
  "tomli>=2.0.0; python_version < '3.11'",
26
26
  ]
27
- version = "1.0.0.dev2"
27
+ version = "1.0.0.dev3"
28
28
  classifiers = [
29
29
  "Development Status :: 3 - Alpha", # https://pypi.org/classifiers/
30
30
  "Programming Language :: Python :: 3",
@@ -1,6 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import functools
4
+ import json
3
5
  import logging
6
+ import operator
4
7
  from collections.abc import Iterable, Iterator, Mapping
5
8
 
6
9
  import polars as pl
@@ -14,6 +17,8 @@ import lazynwb.types_
14
17
 
15
18
  logger = logging.getLogger(__name__)
16
19
 
20
+ _POLARS_DYNAMIC_PREDICATE_TOKEN = "dynamic_pred"
21
+
17
22
 
18
23
  def scan_nwb(
19
24
  source: lazynwb.types_.PathLike | Iterable[lazynwb.types_.PathLike],
@@ -133,6 +138,17 @@ def scan_nwb(
133
138
  else:
134
139
  logger.debug(f"Batch size set to {batch_size} rows per batch")
135
140
 
141
+ predicate, dynamic_predicate_count = _remove_polars_dynamic_predicates(
142
+ predicate
143
+ )
144
+ if dynamic_predicate_count:
145
+ logger.debug(
146
+ "Removed %d Polars dynamic predicate conjunct(s) from %r scan "
147
+ "predicate; Python IO plugins do not receive the dynamic TopK state",
148
+ dynamic_predicate_count,
149
+ table_path,
150
+ )
151
+
136
152
  if predicate is not None:
137
153
  # - if we have a predicate, we'll fetch the minimal df, apply predicate, then fetch remaining columns in with_columns
138
154
  initial_columns = predicate.meta.root_names()
@@ -282,6 +298,136 @@ def scan_nwb(
282
298
  )
283
299
 
284
300
 
301
+ def _remove_polars_dynamic_predicates(
302
+ predicate: pl.Expr | None,
303
+ ) -> tuple[pl.Expr | None, int]:
304
+ if predicate is None or not _predicate_contains_polars_dynamic_predicate(
305
+ predicate
306
+ ):
307
+ return predicate, 0
308
+
309
+ conjuncts = _split_conjunctive_predicate(predicate)
310
+ retained_conjuncts: list[pl.Expr] = []
311
+ dropped_conjunct_count = 0
312
+ for conjunct in conjuncts:
313
+ if not _predicate_contains_polars_dynamic_predicate(conjunct):
314
+ retained_conjuncts.append(conjunct)
315
+ continue
316
+ if _is_standalone_polars_dynamic_predicate(conjunct):
317
+ dropped_conjunct_count += 1
318
+ continue
319
+ logger.debug(
320
+ "Cannot isolate Polars dynamic predicate from pushed predicate %s; "
321
+ "leaving predicate unchanged",
322
+ conjunct,
323
+ )
324
+ return predicate, 0
325
+
326
+ if dropped_conjunct_count == 0:
327
+ return predicate, 0
328
+ if not retained_conjuncts:
329
+ return None, dropped_conjunct_count
330
+ return functools.reduce(operator.and_, retained_conjuncts), dropped_conjunct_count
331
+
332
+
333
+ def _split_conjunctive_predicate(predicate: pl.Expr) -> list[pl.Expr]:
334
+ if not _is_binary_and_predicate(predicate):
335
+ return [predicate]
336
+
337
+ conjuncts: list[pl.Expr] = []
338
+ try:
339
+ children = predicate.meta.pop()
340
+ except BaseException as exc:
341
+ if not _is_polars_panic_exception(exc):
342
+ raise
343
+ logger.debug(
344
+ "Polars panicked while splitting pushed predicate %s; leaving it intact",
345
+ predicate,
346
+ )
347
+ return [predicate]
348
+
349
+ for child in children:
350
+ conjuncts.extend(_split_conjunctive_predicate(child))
351
+ return conjuncts
352
+
353
+
354
+ def _is_binary_and_predicate(predicate: pl.Expr) -> bool:
355
+ payload = _predicate_json_payload(predicate)
356
+ if not isinstance(payload, dict):
357
+ return False
358
+ binary_expr = payload.get("BinaryExpr")
359
+ return isinstance(binary_expr, dict) and binary_expr.get("op") == "And"
360
+
361
+
362
+ def _is_standalone_polars_dynamic_predicate(predicate: pl.Expr) -> bool:
363
+ payload = _predicate_json_payload(predicate)
364
+ if not isinstance(payload, dict):
365
+ return False
366
+ display_expr = payload.get("Display")
367
+ if not isinstance(display_expr, dict):
368
+ return False
369
+ fmt_str = display_expr.get("fmt_str")
370
+ return isinstance(fmt_str, str) and fmt_str.startswith(
371
+ _POLARS_DYNAMIC_PREDICATE_TOKEN
372
+ )
373
+
374
+
375
+ def _predicate_contains_polars_dynamic_predicate(predicate: pl.Expr) -> bool:
376
+ predicate_text = str(predicate)
377
+ if _POLARS_DYNAMIC_PREDICATE_TOKEN not in predicate_text:
378
+ return False
379
+
380
+ payload = _predicate_json_payload(predicate)
381
+ if payload is None:
382
+ return True
383
+ return _json_payload_contains_polars_dynamic_predicate(payload)
384
+
385
+
386
+ def _predicate_json_payload(predicate: pl.Expr) -> object | None:
387
+ try:
388
+ serialized = predicate.meta.serialize(format="json")
389
+ except BaseException as exc:
390
+ if not _is_polars_panic_exception(exc):
391
+ raise
392
+ logger.debug(
393
+ "Polars panicked while serializing pushed predicate %s to JSON",
394
+ predicate,
395
+ )
396
+ return None
397
+
398
+ if not isinstance(serialized, str):
399
+ return None
400
+ try:
401
+ return json.loads(serialized)
402
+ except json.JSONDecodeError:
403
+ logger.debug("Could not decode pushed predicate JSON: %s", serialized)
404
+ return None
405
+
406
+
407
+ def _json_payload_contains_polars_dynamic_predicate(payload: object) -> bool:
408
+ if isinstance(payload, dict):
409
+ display_expr = payload.get("Display")
410
+ if isinstance(display_expr, dict):
411
+ fmt_str = display_expr.get("fmt_str")
412
+ if isinstance(fmt_str, str) and fmt_str.startswith(
413
+ _POLARS_DYNAMIC_PREDICATE_TOKEN
414
+ ):
415
+ return True
416
+ return any(
417
+ _json_payload_contains_polars_dynamic_predicate(value)
418
+ for value in payload.values()
419
+ )
420
+ if isinstance(payload, list):
421
+ return any(
422
+ _json_payload_contains_polars_dynamic_predicate(value) for value in payload
423
+ )
424
+ return False
425
+
426
+
427
+ def _is_polars_panic_exception(exc: BaseException) -> bool:
428
+ return type(exc).__name__ == "PanicException"
429
+
430
+
285
431
  def _get_limited_path_to_row_indices(
286
432
  source: Iterable[lazynwb.types_.PathLike],
287
433
  table_path: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lazynwb
3
- Version: 1.0.0.dev2
3
+ Version: 1.0.0.dev3
4
4
  Summary: An attempt to speed-up access to large NWB (Neurodata Without Borders) files stored in the cloud.
5
5
  Author-email: Ben Hardcastle <ben.hardcastle@alleninstitue.org>
6
6
  License: MIT
@@ -4,16 +4,16 @@ import tempfile
4
4
  import uuid
5
5
  from datetime import datetime, timezone
6
6
 
7
+ import h5py
7
8
  import polars as pl
8
9
  import pytest
9
-
10
- import h5py
11
10
  from pynwb import NWBHDF5IO, NWBFile
12
11
  from pynwb.epoch import TimeIntervals
13
12
 
14
13
  import lazynwb
15
14
  import lazynwb.tables
16
15
 
16
+
17
17
  def test_polars_dtype_inference(local_hdf5_path):
18
18
  schema = lazynwb.tables.get_table_schema(
19
19
  local_hdf5_path,
@@ -160,6 +160,41 @@ def test_scan_nwb_predicate_pushdown(local_hdf5_path):
160
160
  ).all(), "Predicate pushdown filter on internal column was not applied correctly"
161
161
  assert len(filtered_internal_df) == len(lf.collect().filter(internal_expr)), "Filtered DataFrame length does not match length when collecting and filtering separately"
162
162
 
163
+
164
+ def test_scan_nwb_sort_head_ignores_polars_dynamic_topk_predicate(
165
+ local_hdf5_path: pathlib.Path,
166
+ caplog: pytest.LogCaptureFixture,
167
+ ) -> None:
168
+ """Polars 1.39+ may push optimizer-only dynamic TopK predicates into IO plugins."""
169
+ caplog.set_level(logging.DEBUG, logger="lazynwb.lazyframe")
170
+
171
+ lf = lazynwb.scan_nwb(
172
+ source=local_hdf5_path,
173
+ table_path="/units",
174
+ disable_progress=True,
175
+ )
176
+ query = (
177
+ lf.filter(pl.col("structure") == "VISp")
178
+ .sort(lazynwb.TABLE_INDEX_COLUMN_NAME, descending=True)
179
+ .head(1)
180
+ .select(lazynwb.TABLE_INDEX_COLUMN_NAME, "spike_times")
181
+ )
182
+ plan = query.explain()
183
+
184
+ result = query.collect()
185
+ expected = (
186
+ lf.collect()
187
+ .filter(pl.col("structure") == "VISp")
188
+ .sort(lazynwb.TABLE_INDEX_COLUMN_NAME, descending=True)
189
+ .head(1)
190
+ .select(lazynwb.TABLE_INDEX_COLUMN_NAME, "spike_times")
191
+ )
192
+
193
+ assert result.equals(expected)
194
+ if "dynamic_predicate" in plan:
195
+ assert "Removed 1 Polars dynamic predicate conjunct" in caplog.text
196
+
197
+
163
198
  def test_scan_nwb_raises_on_missing(local_hdf5_path):
164
199
  """Test that scan_nwb raises an error when the table is not found and raise_on_missing=True."""
165
200
  # Try with a non-existent table path
File without changes
File without changes
File without changes