PyPI - anndata - Versions diffs - 0.12.0rc4__tar.gz → 0.12.1__tar.gz - Mend

anndata 0.12.0rc4tar.gz → 0.12.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (207) hide show

{anndata-0.12.0rc4 → anndata-0.12.1}/.github/workflows/test-cpu.yml RENAMED Viewed

@@ -47,6 +47,7 @@ jobs:
     strategy:
       matrix:
         env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
+        io_mark: ["zarr_io", "not zarr_io"]
     env:  # environment variable for use in codecov’s env_vars tagging
       ENV_NAME: ${{ matrix.env.name }}
     steps:
@@ -70,7 +71,7 @@ jobs:
         run: uvx hatch -v env create ${{ matrix.env.name }}
       - name: Run tests
-        run: uvx hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto --cov --cov-report=xml --junitxml=test-data/test-results.xml
+        run: uvx hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto --cov --cov-report=xml --junitxml=test-data/test-results.xml -m "${{matrix.io_mark}}"
       - name: Upload coverage data
         uses: codecov/codecov-action@v5

{anndata-0.12.0rc4 → anndata-0.12.1}/.pre-commit-config.yaml RENAMED Viewed

@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.12
+    rev: v0.12.4
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -10,7 +10,7 @@ repos:
         id: ruff
         args: ["--preview", "--select=PLR0917"]
   - repo: https://github.com/biomejs/pre-commit
-    rev: v1.9.4
+    rev: v2.1.2
     hooks:
       - id: biome-format
   - repo: https://github.com/ComPWA/taplo-pre-commit

{anndata-0.12.0rc4 → anndata-0.12.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: anndata
-Version: 0.12.0rc4
+Version: 0.12.1
 Summary: Annotated data.
 Project-URL: Documentation, https://anndata.readthedocs.io/
 Project-URL: Source, https://github.com/scverse/anndata
@@ -28,11 +28,11 @@ Requires-Dist: array-api-compat>=1.7.1
 Requires-Dist: h5py>=3.8
 Requires-Dist: legacy-api-wrap
 Requires-Dist: natsort
-Requires-Dist: numpy>=1.25
+Requires-Dist: numpy>=1.26
 Requires-Dist: packaging>=24.2
-Requires-Dist: pandas!=2.1.0rc0,!=2.1.2,>=2.0.0
-Requires-Dist: scipy>=1.11
-Requires-Dist: zarr!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,!=3.0.4,!=3.0.5,!=3.0.6,!=3.0.7,<3.1,>=2.18.7
+Requires-Dist: pandas!=2.1.2,>=2.1.0
+Requires-Dist: scipy>=1.12
+Requires-Dist: zarr!=3.0.*,>=2.18.7
 Provides-Extra: cu11
 Requires-Dist: cupy-cuda11x; extra == 'cu11'
 Provides-Extra: cu12
@@ -74,13 +74,12 @@ Requires-Dist: boltons; extra == 'test'
 Requires-Dist: dask[array]!=2024.8.*,!=2024.9.*,<2025.2.0,>=2023.5.1; extra == 'test'
 Requires-Dist: dask[distributed]; extra == 'test'
 Requires-Dist: filelock; extra == 'test'
-Requires-Dist: httpx; extra == 'test'
+Requires-Dist: httpx<1.0; extra == 'test'
 Requires-Dist: joblib; extra == 'test'
 Requires-Dist: loompy>=3.0.5; extra == 'test'
 Requires-Dist: matplotlib; extra == 'test'
 Requires-Dist: openpyxl; extra == 'test'
-Requires-Dist: pandas>=2.1.0; extra == 'test'
-Requires-Dist: pyarrow; extra == 'test'
+Requires-Dist: pyarrow<21; extra == 'test'
 Requires-Dist: pytest-cov; extra == 'test'
 Requires-Dist: pytest-memray; extra == 'test'
 Requires-Dist: pytest-mock; extra == 'test'
@@ -97,12 +96,12 @@ Requires-Dist: boltons; extra == 'test-min'
 Requires-Dist: dask[array]!=2024.8.*,!=2024.9.*,<2025.2.0,>=2023.5.1; extra == 'test-min'
 Requires-Dist: dask[distributed]; extra == 'test-min'
 Requires-Dist: filelock; extra == 'test-min'
-Requires-Dist: httpx; extra == 'test-min'
+Requires-Dist: httpx<1.0; extra == 'test-min'
 Requires-Dist: joblib; extra == 'test-min'
 Requires-Dist: loompy>=3.0.5; extra == 'test-min'
 Requires-Dist: matplotlib; extra == 'test-min'
 Requires-Dist: openpyxl; extra == 'test-min'
-Requires-Dist: pyarrow; extra == 'test-min'
+Requires-Dist: pyarrow<21; extra == 'test-min'
 Requires-Dist: pytest-cov; extra == 'test-min'
 Requires-Dist: pytest-memray; extra == 'test-min'
 Requires-Dist: pytest-mock; extra == 'test-min'

{anndata-0.12.0rc4 → anndata-0.12.1}/benchmarks/asv.conf.json RENAMED Viewed

@@ -70,6 +70,8 @@
         "pytoml": [""],
         "pytest": [""],
         "pooch": [""],
+        "xarray": [""],
+        "dask": [""],
         // "scanpy": [""],
         // "psutil": [""]
     },

anndata-0.12.1/benchmarks/benchmarks/dataset2d.py ADDED Viewed

@@ -0,0 +1,61 @@
+from __future__ import annotations
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING
+import h5py
+import numpy as np
+import pandas as pd
+import zarr
+import anndata as ad
+if TYPE_CHECKING:
+    from collections.abc import Callable
+class Dataset2D:
+    param_names = ("gen_store", "chunks")
+    params = (
+        (
+            lambda: h5py.File(Path(tempfile.mkdtemp()) / "data.h5ad", mode="w"),
+            lambda: zarr.open(
+                Path(tempfile.mkdtemp()) / "data.zarr", mode="w", zarr_version=2
+            ),
+        ),
+        ((-1,), None),
+    )
+    def setup(
+        self, gen_store: Callable[[], zarr.Group | h5py.File], chunks: None | tuple[int]
+    ):
+        self.n_obs = 100000
+        df = pd.DataFrame(
+            {
+                "a": pd.Categorical(np.array(["a"] * self.n_obs)),
+                "b": np.arange(self.n_obs),
+            },
+            index=[f"cell{i}" for i in range(self.n_obs)],
+        )
+        store = gen_store()
+        ad.io.write_elem(store, "obs", df)
+        self.ds = ad.experimental.read_elem_lazy(store["obs"], chunks=chunks)
+    def time_getitem_slice(self, *_):
+        self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
+    def peakmem_getitem_slice(self, *_):
+        self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
+    def time_full_to_memory(self, *_):
+        self.ds.to_memory()
+    def peakmem_full_to_memory(self, *_):
+        self.ds.to_memory()
+    def time_getitem_bool_mask(self, *_):
+        self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
+    def peakmem_getitem_bool_mask(self, *_):
+        self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()

{anndata-0.12.0rc4 → anndata-0.12.1}/benchmarks/benchmarks/sparse_dataset.py RENAMED Viewed

@@ -4,11 +4,13 @@ from types import MappingProxyType
 import numpy as np
 import zarr
+from dask.array.core import Array as DaskArray
 from scipy import sparse
 from anndata import AnnData
 from anndata._core.sparse_dataset import sparse_dataset
 from anndata._io.specs import write_elem
+from anndata.experimental import read_elem_lazy
 def make_alternating_mask(n):
@@ -37,27 +39,36 @@ class SparseCSRContiguousSlice:
             # (10_000, 500)
         ],
         _slices.keys(),
+        [True, False],
     )
-    param_names = ("shape", "slice")
+    param_names = ("shape", "slice", "use_dask")
-    def setup(self, shape: tuple[int, int], slice: str):
+    def setup(self, shape: tuple[int, int], slice: str, use_dask: bool):  # noqa: FBT001
         X = sparse.random(
             *shape, density=0.01, format="csr", random_state=np.random.default_rng(42)
         )
         self.slice = self._slices[slice]
         g = zarr.group()
         write_elem(g, "X", X)
-        self.x = sparse_dataset(g["X"])
+        self.x = read_elem_lazy(g["X"]) if use_dask else sparse_dataset(g["X"])
         self.adata = AnnData(self.x)
     def time_getitem(self, *_):
-        self.x[self.slice]
+        res = self.x[self.slice]
+        if isinstance(res, DaskArray):
+            res.compute()
     def peakmem_getitem(self, *_):
-        self.x[self.slice]
+        res = self.x[self.slice]
+        if isinstance(res, DaskArray):
+            res.compute()
     def time_getitem_adata(self, *_):
-        self.adata[self.slice]
+        res = self.adata[self.slice]
+        if isinstance(res, DaskArray):
+            res.compute()
     def peakmem_getitem_adata(self, *_):
-        self.adata[self.slice]
+        res = self.adata[self.slice]
+        if isinstance(res, DaskArray):
+            res.compute()

{anndata-0.12.0rc4 → anndata-0.12.1}/biome.jsonc RENAMED Viewed

@@ -1,9 +1,9 @@
 {
-    "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+    "$schema": "https://biomejs.dev/schemas/2.1.1/schema.json",
     "formatter": { "useEditorconfig": true },
     "overrides": [
         {
-            "include": ["./.vscode/*.json", "**/*.jsonc", "**/asv.conf.json"],
+            "includes": ["./.vscode/*.json", "**/*.jsonc", "**/asv.conf.json"],
             "json": {
                 "formatter": {
                     "trailingCommas": "all",

{anndata-0.12.0rc4 → anndata-0.12.1}/docs/conf.py RENAMED Viewed

@@ -125,18 +125,19 @@ intersphinx_mapping = dict(
     awkward=("https://awkward-array.org/doc/stable", None),
     cupy=("https://docs.cupy.dev/en/stable", None),
     dask=("https://docs.dask.org/en/stable", None),
+    fsspec=("https://filesystem-spec.readthedocs.io/en/stable/", None),
     h5py=("https://docs.h5py.org/en/latest", None),
     hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest", None),
+    kvikio=("https://docs.rapids.ai/api/kvikio/stable/", None),
     loompy=("https://linnarssonlab.org/loompy", None),
     numpy=("https://numpy.org/doc/stable", None),
+    obstore=("https://developmentseed.org/obstore/latest/", None),
     pandas=("https://pandas.pydata.org/pandas-docs/stable", None),
     python=("https://docs.python.org/3", None),
     scipy=("https://docs.scipy.org/doc/scipy", None),
     sklearn=("https://scikit-learn.org/stable", None),
-    zarr=("https://zarr.readthedocs.io/en/stable/", None),
     xarray=("https://docs.xarray.dev/en/stable", None),
-    obstore=("https://developmentseed.org/obstore/latest/", None),
-    kvikio=("https://docs.rapids.ai/api/kvikio/stable/", None),
+    zarr=("https://zarr.readthedocs.io/en/stable/", None),
     zarrs=("https://zarrs-python.readthedocs.io/en/stable/", None),
 )

anndata-0.12.1/docs/release-notes/0.12.0.md ADDED Viewed

@@ -0,0 +1,51 @@
+(v0.12.0)=
+### 0.12.0 {small}`2025-07-16`
+- (v0.12.0rc4)=
+  {guilabel}`rc4` 2025-06-18
+- (v0.12.0rc3)=
+  {guilabel}`rc3` 2025-05-20
+- (v0.12.0rc2)=
+  {guilabel}`rc2` 2025-05-15
+- (v0.12.0rc1)=
+  {guilabel}`rc1` 2025-04-09
+#### Breaking changes
+- {guilabel}`rc1` Remove `anndata.read` {user}`ilan-gold` ({pr}`1766`)
+- {guilabel}`rc1` Tighten usage of {class}`scipy.sparse.spmatrix` for describing sparse matrices in types and instance checks to only {class}`scipy.sparse.csr_matrix` and {class}`scipy.sparse.csc_matrix` {user}`ilan-gold` ({pr}`1768`)
+- {guilabel}`rc1` Disallow declaration of {class}`~anndata.AnnData` with non-`cs{r,c}` sparse data-structures {user}`ilan-gold` ({pr}`1829`)
+- {guilabel}`rc1` Upgrade all `DeprecationWarning`s to `FutureWarning`s {user}`ilan-gold` ({pr}`1874`)
+- {guilabel}`rc4` Lower bound `xarray` by `2025.06.01`.  {class}`pandas.arrays.StringArray` was previously used as the in-memory `nullable-string-array` container in `xarray`, but due to {issue}`pydata/xarray#10419` now uses {class}`numpy.ndarray` with an object data type.  {user}`ilan-gold` ({pr}`2008`)
+#### Bug fixes
+- Fix {func}`anndata.experimental.backed.Dataset2D.reindex` internal setting {user}`ilan-gold` ({pr}`2018`)
+- {guilabel}`rc1` Disallow writing of {class}`~anndata.experimental.backed.Dataset2D` objects {user}`ilan-gold` ({pr}`1887`)
+- {guilabel}`rc1` Upgrade old deprecation warning to a `FutureWarning` on `BaseCompressedSparseDataset.__setitem__`, showing our intent to remove the feature in the next release.  {user}`ilan-gold` ({pr}`1928`)
+- {guilabel}`rc1` Don't use {func}`asyncio.run` internally for any operations {user}`ilan-gold` ({pr}`1933`)
+- {guilabel}`rc1` Disallow forward slashes in keys for writing {user}`ilan-gold` ({pr}`1940`)
+- {guilabel}`rc2` Convert 1d {class}`numpy.ndarray` and {class}`cupy.ndarray`s in {attr}`anndata.AnnData.obsm` and {attr}`anndata.AnnData.varm` to 2d {user}`ilan-gold` ({pr}`1962`)
+- {guilabel}`rc3` Update zarr v3 bound to >3.0.8 to prevent corrupted data {issue}`zarr-developers/zarr-python#3061` {user}`ilan-gold` ({pr}`1993`)
+#### Features
+- {guilabel}`rc1` {data}`None` values can now be serialized to `.h5ad` and `.zarr`,
+  preserving e.g. {attr}`~anndata.AnnData.uns` structure through saving and loading {user}`flying-sheep` ({pr}`999`)
+- {guilabel}`rc1` Add {func}`~anndata.experimental.read_elem_lazy` (in place of `read_elem_as_dask`) to handle backed dataframes, sparse arrays, and dense arrays, as well as a {func}`~anndata.experimental.read_lazy` to handle reading in as much of the on-disk data as possible to produce a {class}`~anndata.AnnData` object {user}`ilan-gold` ({pr}`1247`)
+- {guilabel}`rc1` Support {mod}`zarr` version 3 python package {user}`ilan-gold` ({pr}`1726`)
+- {guilabel}`rc1` Adopt the Scientific Python [deprecation schedule](https://scientific-python.org/specs/spec-0000/) {user}`ilan-gold` ({pr}`1768`)
+- {guilabel}`rc1` Allow {mod}`zarr` v3 writing of data {user}`ilan-gold` ({pr}`1892`)
+- {guilabel}`rc1` {func}`anndata.register_anndata_namespace` functionality for adding custom functionality to an {class}`~anndata.AnnData` object {user}`srivarra` ({pr}`1870`)
+- {guilabel}`rc2` Allow xarray Datasets to be used for obs/var/obsm/varm. {user}`ilia-kats` ({pr}`1966`)
+- {guilabel}`rc4` {class}`anndata.experimental.backed.Dataset2D` now takes a compositional approach to wrapping {class}`xarray.Dataset` which may have breaking changes over the past release versions. {user}`ilan-gold` ({pr}`1997`)
+- {guilabel}`rc4` Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {attr}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2011`)
+#### Performance
+- {guilabel}`rc2` Load AnnLoader lazily to prevent expensive unnecessary `torch` imports when its available on the system. {user}`Zethson` & {user}`flying-sheep` ({pr}`1950`)
+- {guilabel}`rc4` Improve {func}`~anndata.experimental.read_elem_lazy` performance for `h5ad` files by not caching `indptr`. {user}`ilan-gold` ({pr}`2005`)
+#### Development
+- {guilabel}`rc4` Temporarily bound {mod}`zarr` to `<3.1` until {pr}`1995` is merged to handle the new data type structure.  {user}`ilan-gold` ({pr}`2013`)

anndata-0.12.1/docs/release-notes/0.12.1.md ADDED Viewed

@@ -0,0 +1,10 @@
+(v0.12.1)=
+### 0.12.1 {small}`2025-07-23`
+### Bug fixes
+- Fix `chunks` argument for {func}`anndata.experimental.read_elem_lazy` so that it uses the on-disk chunking when possible, and allow users to pass this argument through to the reading of {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2033`)
+### Performance
+- Improve integer indexing performance of `h5` 1d arrays that are opened via {func}`anndata.experimental.read_elem_lazy` {user}`ilan-gold` ({pr}`2035`)

{anndata-0.12.0rc4 → anndata-0.12.1}/docs/tutorials/zarr-v3.md RENAMED Viewed

@@ -4,6 +4,23 @@
 Users should notice a significant performance improvement, especially for cloud data, but also likely for local data as well.
 Here is a quick guide on some of our learnings so far:
+## Consolidated Metadata
+All `zarr` stores are now consolidated by default when written via {func}`anndata.io.write_zarr` or {meth}`anndata.AnnData.write_zarr`.  For more information on this topic, please seee {ref}`the zarr docs <zarr:user-guide-consolidated-metadata>`.  Practcally, this changes means that once a store has been written, it should be treated as immutable **unless you remove the consolidated metadata and/or rewrite after the mutating operation** i.e., if you wish to use `anndata.io.write_elem` to add a column to `obs`, a `layer` etc. to an existing store.  For example, to mutate an existing store on-disk, you may do:
+```python
+g = zarr.open_group(orig_path, mode="a", use_consolidated=False)
+ad.io.write_elem(
+    g,
+    "obs",
+    obs,
+    dataset_kwargs=dict(chunks=(250,)),
+)
+zarr.consolidate_metadata(g.store)
+```
+In this example, the store was opened unconsolidated (trying to open it as a consolidated store would error out), edited, and then reconsolidated.  Alternatively, one could simple delete the file containing the consolidated metadata first at the root, `.zmetadata`.
 ## Remote data
 We now provide the {func}`anndata.experimental.read_lazy` feature for reading as much of the {class}`~anndata.AnnData` object as lazily as possible, using `dask` and {mod}`xarray`.

{anndata-0.12.0rc4 → anndata-0.12.1}/pyproject.toml RENAMED Viewed

@@ -36,18 +36,16 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Visualization",
 ]
 dependencies = [
-    # pandas 2.1.0rc0 has pandas/issues/54622
-    "pandas >=2.0.0, !=2.1.0rc0, !=2.1.2",
-    "numpy>=1.25",
+    "pandas >=2.1.0, !=2.1.2",
+    "numpy>=1.26",
     # https://github.com/scverse/anndata/issues/1434
-    "scipy >=1.11",
+    "scipy >=1.12",
     "h5py>=3.8",
     "natsort",
     "packaging>=24.2",
     "array_api_compat>=1.7.1",
     "legacy-api-wrap",
-    # <3.1 on account of https://github.com/scverse/anndata/pull/1995
-    "zarr >=2.18.7, !=3.0.0, !=3.0.1, !=3.0.2, !=3.0.3, !=3.0.4, !=3.0.5, !=3.0.6, !=3.0.7, <3.1",
+    "zarr >=2.18.7, !=3.0.*",
 ]
 dynamic = [ "version" ]
@@ -96,16 +94,14 @@ test-min = [
     "joblib",
     "boltons",
     "scanpy>=1.10",
-    "httpx",                # For data downloading
+    # TODO: Is 1.0dev1 a real pre-release? https://pypi.org/project/httpx/#history
+    "httpx<1.0",         # For data downloading
     "dask[distributed]",
     "awkward>=2.3.2",
-    "pyarrow",
+    "pyarrow<21",        # https://github.com/scikit-hep/awkward/issues/3579
     "anndata[dask]",
 ]
-test = [
-    "anndata[test-min,lazy]",
-    "pandas>=2.1.0",
-] # pandas 2.1.0 needs to be specified for xarray to work with min-deps script
+test = [ "anndata[test-min,lazy]" ]
 gpu = [ "cupy" ]
 cu12 = [ "cupy-cuda12x" ]
 cu11 = [ "cupy-cuda11x" ]
@@ -172,7 +168,7 @@ testpaths = [
 ]
 # For some reason this effects how logging is shown when tests are run
 xfail_strict = true
-markers = [ "gpu: mark test to run on GPU" ]
+markers = [ "gpu: mark test to run on GPU", "zarr_io: mark tests that involve zarr io" ]
 [tool.ruff]
 src = [ "src" ]
@@ -212,6 +208,7 @@ ignore = [
     "E731",    # Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments,
     "E741",    # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
     "TID252",  # We use relative imports from parent modules
+    "PLC0415", # We use a lot of non-top-level imports
     "PLR2004", # “2” is often not too “magic” a number
     "PLW2901", # Shadowing loop variables isn’t a big deal
 ]

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_core/aligned_df.py RENAMED Viewed

@@ -59,7 +59,7 @@ def _gen_dataframe_mapping(
         df = pd.DataFrame(
             anno,
             index=None if length is None else mk_index(length),
-            columns=None if len(anno) else [],
+            columns=None if anno else [],
         )
     if length is None:

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_core/anndata.py RENAMED Viewed

@@ -62,7 +62,7 @@ if TYPE_CHECKING:
     from .index import Index
-class AnnData(metaclass=utils.DeprecationMixinMeta):
+class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
     """\
     An annotated data matrix.

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_core/merge.py RENAMED Viewed

@@ -904,12 +904,6 @@ def concat_arrays(  # noqa: PLR0911, PLR0912
             ],
             format="csr",
         )
-        scipy_version = Version(scipy.__version__)
-        # Bug where xstack produces a matrix not an array in 1.11.*
-        if use_sparse_array and (scipy_version.major, scipy_version.minor) == (1, 11):
-            if mat.format == "csc":
-                return sparse.csc_array(mat)
-            return sparse.csr_array(mat)
         return mat
     else:
         return np.concatenate(

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_core/sparse_dataset.py RENAMED Viewed

@@ -165,7 +165,11 @@ class BackedSparseMatrix(_cs_matrix):
     def _get_contiguous_compressed_slice(
         self, s: slice
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        new_indptr = self.indptr[s.start : s.stop + 1].copy()
+        new_indptr = self.indptr[s.start : s.stop + 1]
+        # If indptr is cached, we need to make a copy of the subset
+        # so as not to alter the underlying cached data.
+        if isinstance(self.indptr, np.ndarray):
+            new_indptr = new_indptr.copy()
         start = new_indptr[0]
         stop = new_indptr[-1]

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_core/xarray.py RENAMED Viewed

@@ -245,7 +245,7 @@ class Dataset2D:
         if df.index.name != index_key and index_key is not None:
             df = df.set_index(index_key)
         for col in set(self.columns) - non_nullable_string_cols:
-            df[col] = pd.array(self[col].data, dtype="string")
+            df[col] = df[col].astype(dtype="string")
         df.index.name = None  # matches old AnnData object
         return df
@@ -389,9 +389,12 @@ class Dataset2D:
         }
         el = self.ds.drop_vars(extension_arrays.keys())
         el = el.reindex({index_dim: index}, method=None, fill_value=fill_value)
-        for col in self.ds:
-            el[col] = pd.Series(self.ds[col], index=self.index).reindex(
-                index, fill_value=fill_value
+        for col, data in extension_arrays.items():
+            el[col] = XDataArray.from_series(
+                pd.Series(data.data, index=self.index).reindex(
+                    index.rename(self.index.name) if index is not None else index,
+                    fill_value=fill_value,
+                )
             )
         return Dataset2D(el)

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_io/h5ad.py RENAMED Viewed

@@ -176,7 +176,7 @@ def read_h5ad_backed(
 def read_h5ad(
     filename: PathLike[str] | str,
-    backed: Literal["r", "r+"] | bool | None = None,
+    backed: Literal["r", "r+"] | bool | None = None,  # noqa: FBT001
     *,
     as_sparse: Sequence[str] = (),
     as_sparse_fmt: type[CSMatrix] = sparse.csr_matrix,

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_io/read.py RENAMED Viewed

@@ -22,9 +22,11 @@ if TYPE_CHECKING:
     from collections.abc import Generator, Iterable, Iterator, Mapping
+@old_positionals("first_column_names", "dtype")
 def read_csv(
     filename: PathLike[str] | str | Iterator[str],
     delimiter: str | None = ",",
+    *,
     first_column_names: bool | None = None,
     dtype: str = "float32",
 ) -> AnnData:
@@ -331,9 +333,11 @@ def read_mtx(filename: PathLike[str] | str, dtype: str = "float32") -> AnnData:
     return AnnData(X)
+@old_positionals("first_column_names", "dtype")
 def read_text(
     filename: PathLike[str] | str | Iterator[str],
     delimiter: str | None = None,
+    *,
     first_column_names: bool | None = None,
     dtype: str = "float32",
 ) -> AnnData:
@@ -381,7 +385,7 @@ def _iter_lines(file_like: Iterable[str]) -> Generator[str, None, None]:
 def _read_text(  # noqa: PLR0912, PLR0915
     f: Iterator[str],
     delimiter: str | None,
-    first_column_names: bool | None,
+    first_column_names: bool | None,  # noqa: FBT001
     dtype: str,
 ) -> AnnData:
     comments = []

{anndata-0.12.0rc4 → anndata-0.12.1}/src/anndata/_io/specs/lazy_methods.py RENAMED Viewed

@@ -132,7 +132,7 @@ def read_sparse_as_dask(
     path_or_sparse_dataset = (
         Path(filename(elem))
         if isinstance(elem, H5Group)
-        else ad.io.sparse_dataset(elem)
+        else ad.io.sparse_dataset(elem, should_cache_indptr=False)
     )
     elem_name = get_elem_name(elem)
     shape: tuple[int, int] = tuple(elem.attrs["shape"])
@@ -177,21 +177,37 @@ def read_sparse_as_dask(
     return da_mtx
+def resolve_chunks(
+    elem: H5Array | ZarrArray,
+    chunks_arg: tuple[int, ...] | None,
+    shape: tuple[int, ...],
+) -> tuple[int, ...]:
+    shape = tuple(elem.shape)
+    if chunks_arg is not None:
+        # None and -1 on a given axis indicate that one should use the shape
+        # in `dask`'s semantics.
+        return tuple(
+            c if c not in {None, -1} else s
+            for c, s in zip(chunks_arg, shape, strict=True)
+        )
+    elif elem.chunks is None:  # h5 unchunked
+        return tuple(min(_DEFAULT_STRIDE, s) for s in shape)
+    return elem.chunks
 @_LAZY_REGISTRY.register_read(H5Array, IOSpec("string-array", "0.2.0"))
 def read_h5_string_array(
     elem: H5Array,
     *,
     _reader: LazyReader,
-    chunks: tuple[int, int] | None = None,
+    chunks: tuple[int] | None = None,
 ) -> DaskArray:
     import dask.array as da
     from anndata._io.h5ad import read_dataset
-    return da.from_array(
-        read_dataset(elem),
-        chunks=chunks if chunks is not None else (_DEFAULT_STRIDE,) * len(elem.shape),
-    )
+    chunks = resolve_chunks(elem, chunks, tuple(elem.shape))
+    return da.from_array(read_dataset(elem), chunks=chunks)
 @_LAZY_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
@@ -204,13 +220,7 @@ def read_h5_array(
     elem_name: str = elem.name
     shape = tuple(elem.shape)
     dtype = elem.dtype
-    chunks = (
-        tuple(
-            c if c not in {None, -1} else s for c, s in zip(chunks, shape, strict=True)
-        )
-        if chunks is not None
-        else tuple(min(_DEFAULT_STRIDE, s) for s in shape)
-    )
+    chunks = resolve_chunks(elem, chunks, shape)
     chunk_layout = tuple(
         compute_chunk_layout_for_axis_size(chunks[i], shape[i])
@@ -228,7 +238,6 @@ def read_h5_array(
 def read_zarr_array(
     elem: ZarrArray, *, _reader: LazyReader, chunks: tuple[int, ...] | None = None
 ) -> DaskArray:
-    chunks: tuple[int, ...] = chunks if chunks is not None else elem.chunks
     import dask.array as da
     return da.from_zarr(elem, chunks=chunks)
@@ -284,9 +293,10 @@ def read_dataframe(
     *,
     _reader: LazyReader,
     use_range_index: bool = False,
+    chunks: tuple[int] | None = None,
 ) -> Dataset2D:
     elem_dict = {
-        k: _reader.read_elem(elem[k])
+        k: _reader.read_elem(elem[k], chunks=chunks)
         for k in [*elem.attrs["column-order"], elem.attrs["_index"]]
     }
     # If we use a range index, the coord axis needs to have the special dim name

anndata 0.12.0rc4__tar.gz → 0.12.1__tar.gz

anndata 0.12.0rc4tar.gz → 0.12.1tar.gz