cap-anndata 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cap_anndata/backed_df.py CHANGED
@@ -1,13 +1,10 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from typing import List, Any, Union
4
- import logging
5
4
 
6
5
  from pandas._typing import Self
7
6
  from pandas.core.generic import bool_t
8
7
 
9
- logger = logging.getLogger(__name__)
10
-
11
8
 
12
9
  class CapAnnDataDF(pd.DataFrame):
13
10
  """
@@ -19,26 +16,37 @@ class CapAnnDataDF(pd.DataFrame):
19
16
 
20
17
  _metadata = ["column_order"]
21
18
 
19
+ def column_order_array(self) -> np.array:
20
+ order = self.column_order
21
+ if order is not None and isinstance(order, List):
22
+ # Convert it to numpy array of str elements
23
+ return np.array(order, dtype=object)
24
+ else:
25
+ return order
26
+
22
27
  def rename_column(self, old_name: str, new_name: str) -> None:
23
- i = np.where(self.column_order == old_name)[0]
24
- self.column_order[i] = new_name
28
+ i = np.where(self.column_order_array() == old_name)[0]
29
+ tmp_array = self.column_order_array().copy()
30
+ tmp_array[i] = new_name
31
+ self.column_order = tmp_array.copy()
25
32
  self.rename(columns={old_name: new_name}, inplace=True)
26
33
 
27
34
  def remove_column(self, col_name: str) -> None:
28
- i = np.where(self.column_order == col_name)[0]
29
- self.column_order = np.delete(self.column_order, i)
35
+ i = np.where(self.column_order_array() == col_name)[0]
36
+ self.column_order = np.delete(self.column_order_array(), i)
30
37
  self.drop(columns=[col_name], inplace=True)
31
38
 
32
39
  def __setitem__(self, key, value) -> None:
33
- if key not in self.column_order:
34
- self.column_order = np.append(self.column_order, key)
40
+ if key not in self.column_order_array():
41
+ self.column_order = np.append(self.column_order_array(), key)
35
42
  return super().__setitem__(key, value)
36
43
 
37
44
  @classmethod
38
- def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
45
+ def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
39
46
  if column_order is None:
40
47
  column_order = df.columns.to_numpy()
41
-
48
+ elif isinstance(column_order, List):
49
+ column_order = np.array(column_order)
42
50
  new_inst = cls(df)
43
51
  new_inst.column_order = column_order
44
52
  return new_inst
@@ -47,23 +55,27 @@ class CapAnnDataDF(pd.DataFrame):
47
55
  result = super().join(other=other, **kwargs)
48
56
  if isinstance(other, CapAnnDataDF):
49
57
  new_columns = [
50
- col for col in other.column_order if col not in self.column_order
58
+ col for col in other.column_order_array() if col not in self.column_order_array()
51
59
  ]
52
60
  else:
53
- new_columns = [col for col in other.columns if col not in self.column_order]
54
- column_order = np.append(self.column_order, new_columns)
55
- return self.from_df(result, column_order=column_order)
61
+ new_columns = [col for col in other.columns if col not in self.column_order_array()]
62
+ column_order = np.append(self.column_order_array(), new_columns)
63
+ df = self.from_df(result, column_order=column_order)
64
+ return df
56
65
 
57
66
  def merge(self, right, **kwargs) -> Self:
58
67
  result = super().merge(right=right, **kwargs)
59
68
  if isinstance(right, CapAnnDataDF):
60
69
  new_columns = [
61
- col for col in right.column_order if col not in self.column_order
70
+ col for col in right.column_order_array() if col not in self.column_order_array()
62
71
  ]
63
72
  else:
64
- new_columns = [col for col in right.columns if col not in self.column_order]
65
- column_order = np.append(self.column_order, new_columns)
66
- return self.from_df(result, column_order=column_order)
73
+ new_columns = [col for col in right.columns if col not in self.column_order_array()]
74
+ column_order = np.append(self.column_order_array(), new_columns)
75
+ df = self.from_df(result, column_order=column_order)
76
+ return df
67
77
 
68
78
  def copy(self, deep: Union[bool_t, None] = True) -> Self:
69
- return self.from_df(super().copy(deep=deep), column_order=self.column_order)
79
+ column_order = self.column_order_array()
80
+ df = self.from_df(super().copy(deep=deep), column_order=column_order)
81
+ return df
@@ -7,19 +7,33 @@ import scipy.sparse as ss
7
7
  from packaging import version
8
8
 
9
9
  if version.parse(ad.__version__) < version.parse("0.11.0"):
10
- from anndata.experimental import sparse_dataset, read_elem, write_elem
10
+ from anndata.experimental import (
11
+ sparse_dataset,
12
+ read_elem,
13
+ write_elem,
14
+ CSRDataset,
15
+ CSCDataset,
16
+ )
11
17
  else:
12
- from anndata.io import sparse_dataset, read_elem, write_elem
18
+ from anndata.io import (
19
+ sparse_dataset,
20
+ read_elem,
21
+ write_elem,
22
+ )
23
+ from anndata.abc import (
24
+ CSRDataset,
25
+ CSCDataset,
26
+ )
13
27
 
14
28
  from cap_anndata import CapAnnDataDF, CapAnnDataDict
15
29
 
16
30
  logger = logging.getLogger(__name__)
17
31
 
18
32
  X_NOTATION = Union[
19
- h5py.Dataset, ad.experimental.CSRDataset, ad.experimental.CSCDataset, None
33
+ h5py.Dataset, CSRDataset, CSCDataset, None
20
34
  ]
21
35
  ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
22
-
36
+ FIELDS_SUPPORTED_TO_OVERWRITE = ["obs", "var", "raw.var", "uns", "layers", "obsm", "varm", "obsp", "varp"]
23
37
  NotLinkedObject: Final = "__NotLinkedObject"
24
38
 
25
39
 
@@ -57,15 +71,7 @@ class BaseLayerMatrixAndDf:
57
71
  return shape
58
72
 
59
73
  def _lazy_df_load(self, key: str) -> CapAnnDataDF:
60
- df = CapAnnDataDF()
61
- attribute = self._path_to_content + key
62
- column_order = self._read_attr(self._file[attribute], "column-order")
63
- df.column_order = column_order
64
- if df.column_order.dtype != object:
65
- # empty DataFrame will have column_order as float64
66
- # which leads to failure in overwrite method
67
- df.column_order = df.column_order.astype(object)
68
- return df
74
+ return self._read_df(key=key, columns=[])
69
75
 
70
76
  @staticmethod
71
77
  def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
@@ -93,8 +99,10 @@ class BaseLayerMatrixAndDf:
93
99
  cols_to_read = [c for c in columns if c in column_order]
94
100
  df = CapAnnDataDF()
95
101
  df.column_order = column_order
102
+
96
103
  index_col = self._read_attr(h5_group, "_index")
97
- df.index = read_elem(h5_group[index_col])
104
+ index = read_elem(h5_group[index_col])
105
+ df.index = index
98
106
 
99
107
  for col in cols_to_read:
100
108
  df[col] = read_elem(h5_group[col])
@@ -366,37 +374,43 @@ class CapAnnData(BaseLayerMatrixAndDf):
366
374
  return list(self.obsm.keys())
367
375
 
368
376
  def obs_keys(self) -> List[str]:
369
- return self.obs.column_order.tolist()
377
+ return self.obs.column_order_array().tolist()
370
378
 
371
379
  def var_keys(self) -> List[str]:
372
- return self.var.column_order.tolist()
380
+ return self.var.column_order_array().tolist()
381
+
382
+ def field_to_entity(self, key):
383
+ if key == "obs":
384
+ return self.obs
385
+ elif key == "var":
386
+ return self.var
387
+ elif key == "raw.var":
388
+ return self.raw.var if self.raw is not None else None
389
+ elif key == "uns":
390
+ return self.uns
391
+ elif key == "layers":
392
+ return self.layers
393
+ elif key == "obsm":
394
+ return self.obsm
395
+ elif key == "varm":
396
+ return self.varm
397
+ elif key == "obsp":
398
+ return self.obsp
399
+ elif key == "varp":
400
+ return self.varp
401
+ else:
402
+ raise KeyError(
403
+ f"The field {key} is not supported! The list of supported fields are equal to {FIELDS_SUPPORTED_TO_OVERWRITE} "
404
+ f"attributes of the CapAnnData class."
405
+ )
373
406
 
374
407
  def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
375
- field_to_entity = {
376
- "obs": self.obs,
377
- "var": self.var,
378
- "raw.var": self.raw.var if self.raw is not None else None,
379
- "uns": self.uns,
380
- "layers": self.layers,
381
- "obsm": self.obsm,
382
- "varm": self.varm,
383
- "obsp": self.obsp,
384
- "varp": self.varp,
385
- }
386
-
387
408
  if fields is None:
388
- fields = list(field_to_entity.keys())
389
- else:
390
- for f in fields:
391
- if f not in field_to_entity.keys():
392
- raise KeyError(
393
- f"The field {f} is not supported! The list of supported fields are equal to supported "
394
- f"attributes of the CapAnnData class: obs, var, raw.var and uns."
395
- )
409
+ fields = FIELDS_SUPPORTED_TO_OVERWRITE
396
410
 
397
411
  for key in ["obs", "var", "raw.var"]:
398
412
  if key in fields:
399
- entity: CapAnnDataDF = field_to_entity[key]
413
+ entity: CapAnnDataDF = self.field_to_entity(key)
400
414
  if entity is None:
401
415
  continue
402
416
 
@@ -407,11 +421,22 @@ class CapAnnData(BaseLayerMatrixAndDf):
407
421
  f"{key}/{col}", entity[col].values, compression=compression
408
422
  )
409
423
 
410
- column_order = entity.column_order
424
+ column_order = entity.column_order_array()
411
425
  if (
412
426
  column_order.size == 0
413
427
  ): # Refs https://github.com/cellannotation/cap-anndata/issues/6
414
428
  column_order = np.array([], dtype=np.float64)
429
+
430
+ # Index update
431
+ index_name = entity.index.name
432
+ if not index_name:
433
+ index_name = "_index"
434
+ self._file[key].attrs["_index"] = index_name
435
+ index_col = self._read_attr(self._file[key], "_index")
436
+ self._write_elem(
437
+ f"{key}/{index_col}", entity.index.to_numpy(), compression=compression
438
+ )
439
+
415
440
  self._file[key].attrs["column-order"] = column_order
416
441
 
417
442
  if "uns" in fields:
@@ -424,7 +449,7 @@ class CapAnnData(BaseLayerMatrixAndDf):
424
449
 
425
450
  for field in ["layers", "obsm", "varm", "obsp", "varp"]:
426
451
  if field in fields:
427
- for key in field_to_entity[field].keys_to_remove:
452
+ for key in self.field_to_entity(field).keys_to_remove:
428
453
  del self._file[f"{field}/{key}"]
429
454
 
430
455
  def create_layer(
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cap_anndata
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
5
5
  Home-page: https://github.com/cellannotation/cap-anndata
6
6
  Author: R. Mukhin, A. Isaev
@@ -14,12 +14,23 @@ Classifier: Operating System :: OS Independent
14
14
  Requires-Python: >=3.9
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
- Requires-Dist: numpy >=1.23.5
18
- Requires-Dist: pandas >=2.2.0
19
- Requires-Dist: anndata >=0.10.0
17
+ Requires-Dist: numpy>=1.23.5
18
+ Requires-Dist: pandas>=2.2.0
19
+ Requires-Dist: anndata>=0.10.0
20
20
  Provides-Extra: dev
21
- Requires-Dist: pytest >=8.0.0 ; extra == 'dev'
22
- Requires-Dist: setuptools ~=69.1.1 ; extra == 'dev'
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Requires-Dist: setuptools~=69.1.1; extra == "dev"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: project-url
30
+ Dynamic: provides-extra
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
23
34
 
24
35
  # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
25
36
 
@@ -0,0 +1,10 @@
1
+ cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
2
+ cap_anndata/backed_df.py,sha256=2OVomvTY51V05sYwEXg-4JYBgd9iJCA2-Lt7nEAL1Ug,3255
3
+ cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
4
+ cap_anndata/cap_anndata.py,sha256=-Lp6wxPncVcl_TaECnE6uHTfD9j_Ow_rScvpAWKK_fs,21081
5
+ cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
6
+ cap_anndata-0.4.0.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
7
+ cap_anndata-0.4.0.dist-info/METADATA,sha256=IXvItMAdXH-CunN3fNlyHPNFmxfoF9dOrU58tl17eLQ,2539
8
+ cap_anndata-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
+ cap_anndata-0.4.0.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
10
+ cap_anndata-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
2
- cap_anndata/backed_df.py,sha256=bMNsArbPjA-TN7eQB4-9Y2l3s8o03-dM4hPnOR9tROc,2622
3
- cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
4
- cap_anndata/cap_anndata.py,sha256=uQh49Kwu2cE4-ebgOvb78mMGA_afkZcsr71j6f8EX2I,20600
5
- cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
6
- cap_anndata-0.3.1.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
7
- cap_anndata-0.3.1.dist-info/METADATA,sha256=688YuF45IuOvu1Hqxbt_O1aeYkoMX4tjV0b2hb1WY8I,2304
8
- cap_anndata-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
9
- cap_anndata-0.3.1.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
10
- cap_anndata-0.3.1.dist-info/RECORD,,