cap-anndata 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
cap_anndata/backed_df.py CHANGED
@@ -1,13 +1,10 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from typing import List, Any, Union
4
- import logging
5
4
 
6
5
  from pandas._typing import Self
7
6
  from pandas.core.generic import bool_t
8
7
 
9
- logger = logging.getLogger(__name__)
10
-
11
8
 
12
9
  class CapAnnDataDF(pd.DataFrame):
13
10
  """
@@ -19,26 +16,37 @@ class CapAnnDataDF(pd.DataFrame):
19
16
 
20
17
  _metadata = ["column_order"]
21
18
 
19
+ def column_order_array(self) -> np.array:
20
+ order = self.column_order
21
+ if order is not None and isinstance(order, List):
22
+ # Convert it to numpy array of str elements
23
+ return np.array(order, dtype=object)
24
+ else:
25
+ return order
26
+
22
27
  def rename_column(self, old_name: str, new_name: str) -> None:
23
- i = np.where(self.column_order == old_name)[0]
24
- self.column_order[i] = new_name
28
+ i = np.where(self.column_order_array() == old_name)[0]
29
+ tmp_array = self.column_order_array().copy()
30
+ tmp_array[i] = new_name
31
+ self.column_order = tmp_array.copy()
25
32
  self.rename(columns={old_name: new_name}, inplace=True)
26
33
 
27
34
  def remove_column(self, col_name: str) -> None:
28
- i = np.where(self.column_order == col_name)[0]
29
- self.column_order = np.delete(self.column_order, i)
35
+ i = np.where(self.column_order_array() == col_name)[0]
36
+ self.column_order = np.delete(self.column_order_array(), i)
30
37
  self.drop(columns=[col_name], inplace=True)
31
38
 
32
39
  def __setitem__(self, key, value) -> None:
33
- if key not in self.column_order:
34
- self.column_order = np.append(self.column_order, key)
40
+ if key not in self.column_order_array():
41
+ self.column_order = np.append(self.column_order_array(), key)
35
42
  return super().__setitem__(key, value)
36
43
 
37
44
  @classmethod
38
- def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
45
+ def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
39
46
  if column_order is None:
40
47
  column_order = df.columns.to_numpy()
41
-
48
+ elif isinstance(column_order, List):
49
+ column_order = np.array(column_order)
42
50
  new_inst = cls(df)
43
51
  new_inst.column_order = column_order
44
52
  return new_inst
@@ -47,23 +55,27 @@ class CapAnnDataDF(pd.DataFrame):
47
55
  result = super().join(other=other, **kwargs)
48
56
  if isinstance(other, CapAnnDataDF):
49
57
  new_columns = [
50
- col for col in other.column_order if col not in self.column_order
58
+ col for col in other.column_order_array() if col not in self.column_order_array()
51
59
  ]
52
60
  else:
53
- new_columns = [col for col in other.columns if col not in self.column_order]
54
- column_order = np.append(self.column_order, new_columns)
55
- return self.from_df(result, column_order=column_order)
61
+ new_columns = [col for col in other.columns if col not in self.column_order_array()]
62
+ column_order = np.append(self.column_order_array(), new_columns)
63
+ df = self.from_df(result, column_order=column_order)
64
+ return df
56
65
 
57
66
  def merge(self, right, **kwargs) -> Self:
58
67
  result = super().merge(right=right, **kwargs)
59
68
  if isinstance(right, CapAnnDataDF):
60
69
  new_columns = [
61
- col for col in right.column_order if col not in self.column_order
70
+ col for col in right.column_order_array() if col not in self.column_order_array()
62
71
  ]
63
72
  else:
64
- new_columns = [col for col in right.columns if col not in self.column_order]
65
- column_order = np.append(self.column_order, new_columns)
66
- return self.from_df(result, column_order=column_order)
73
+ new_columns = [col for col in right.columns if col not in self.column_order_array()]
74
+ column_order = np.append(self.column_order_array(), new_columns)
75
+ df = self.from_df(result, column_order=column_order)
76
+ return df
67
77
 
68
78
  def copy(self, deep: Union[bool_t, None] = True) -> Self:
69
- return self.from_df(super().copy(deep=deep), column_order=self.column_order)
79
+ column_order = self.column_order_array()
80
+ df = self.from_df(super().copy(deep=deep), column_order=column_order)
81
+ return df
@@ -7,19 +7,33 @@ import scipy.sparse as ss
7
7
  from packaging import version
8
8
 
9
9
  if version.parse(ad.__version__) < version.parse("0.11.0"):
10
- from anndata.experimental import sparse_dataset, read_elem, write_elem
10
+ from anndata.experimental import (
11
+ sparse_dataset,
12
+ read_elem,
13
+ write_elem,
14
+ CSRDataset,
15
+ CSCDataset,
16
+ )
11
17
  else:
12
- from anndata.io import sparse_dataset, read_elem, write_elem
18
+ from anndata.io import (
19
+ sparse_dataset,
20
+ read_elem,
21
+ write_elem,
22
+ )
23
+ from anndata.abc import (
24
+ CSRDataset,
25
+ CSCDataset,
26
+ )
13
27
 
14
28
  from cap_anndata import CapAnnDataDF, CapAnnDataDict
15
29
 
16
30
  logger = logging.getLogger(__name__)
17
31
 
18
32
  X_NOTATION = Union[
19
- h5py.Dataset, ad.experimental.CSRDataset, ad.experimental.CSCDataset, None
33
+ h5py.Dataset, CSRDataset, CSCDataset, None
20
34
  ]
21
35
  ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
22
-
36
+ FIELDS_SUPPORTED_TO_OVERWRITE = ["obs", "var", "raw.var", "uns", "layers", "obsm", "varm", "obsp", "varp"]
23
37
  NotLinkedObject: Final = "__NotLinkedObject"
24
38
 
25
39
 
@@ -57,15 +71,7 @@ class BaseLayerMatrixAndDf:
57
71
  return shape
58
72
 
59
73
  def _lazy_df_load(self, key: str) -> CapAnnDataDF:
60
- df = CapAnnDataDF()
61
- attribute = self._path_to_content + key
62
- column_order = self._read_attr(self._file[attribute], "column-order")
63
- df.column_order = column_order
64
- if df.column_order.dtype != object:
65
- # empty DataFrame will have column_order as float64
66
- # which leads to failure in overwrite method
67
- df.column_order = df.column_order.astype(object)
68
- return df
74
+ return self._read_df(key=key, columns=[])
69
75
 
70
76
  @staticmethod
71
77
  def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
@@ -93,8 +99,10 @@ class BaseLayerMatrixAndDf:
93
99
  cols_to_read = [c for c in columns if c in column_order]
94
100
  df = CapAnnDataDF()
95
101
  df.column_order = column_order
102
+
96
103
  index_col = self._read_attr(h5_group, "_index")
97
- df.index = read_elem(h5_group[index_col])
104
+ index = read_elem(h5_group[index_col])
105
+ df.index = index
98
106
 
99
107
  for col in cols_to_read:
100
108
  df[col] = read_elem(h5_group[col])
@@ -366,37 +374,43 @@ class CapAnnData(BaseLayerMatrixAndDf):
366
374
  return list(self.obsm.keys())
367
375
 
368
376
  def obs_keys(self) -> List[str]:
369
- return self.obs.column_order.tolist()
377
+ return self.obs.column_order_array().tolist()
370
378
 
371
379
  def var_keys(self) -> List[str]:
372
- return self.var.column_order.tolist()
380
+ return self.var.column_order_array().tolist()
381
+
382
+ def field_to_entity(self, key):
383
+ if key == "obs":
384
+ return self.obs
385
+ elif key == "var":
386
+ return self.var
387
+ elif key == "raw.var":
388
+ return self.raw.var if self.raw is not None else None
389
+ elif key == "uns":
390
+ return self.uns
391
+ elif key == "layers":
392
+ return self.layers
393
+ elif key == "obsm":
394
+ return self.obsm
395
+ elif key == "varm":
396
+ return self.varm
397
+ elif key == "obsp":
398
+ return self.obsp
399
+ elif key == "varp":
400
+ return self.varp
401
+ else:
402
+ raise KeyError(
403
+ f"The field {key} is not supported! The list of supported fields are equal to {FIELDS_SUPPORTED_TO_OVERWRITE} "
404
+ f"attributes of the CapAnnData class."
405
+ )
373
406
 
374
407
  def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
375
- field_to_entity = {
376
- "obs": self.obs,
377
- "var": self.var,
378
- "raw.var": self.raw.var if self.raw is not None else None,
379
- "uns": self.uns,
380
- "layers": self.layers,
381
- "obsm": self.obsm,
382
- "varm": self.varm,
383
- "obsp": self.obsp,
384
- "varp": self.varp,
385
- }
386
-
387
408
  if fields is None:
388
- fields = list(field_to_entity.keys())
389
- else:
390
- for f in fields:
391
- if f not in field_to_entity.keys():
392
- raise KeyError(
393
- f"The field {f} is not supported! The list of supported fields are equal to supported "
394
- f"attributes of the CapAnnData class: obs, var, raw.var and uns."
395
- )
409
+ fields = FIELDS_SUPPORTED_TO_OVERWRITE
396
410
 
397
411
  for key in ["obs", "var", "raw.var"]:
398
412
  if key in fields:
399
- entity: CapAnnDataDF = field_to_entity[key]
413
+ entity: CapAnnDataDF = self.field_to_entity(key)
400
414
  if entity is None:
401
415
  continue
402
416
 
@@ -407,11 +421,22 @@ class CapAnnData(BaseLayerMatrixAndDf):
407
421
  f"{key}/{col}", entity[col].values, compression=compression
408
422
  )
409
423
 
410
- column_order = entity.column_order
424
+ column_order = entity.column_order_array()
411
425
  if (
412
426
  column_order.size == 0
413
427
  ): # Refs https://github.com/cellannotation/cap-anndata/issues/6
414
428
  column_order = np.array([], dtype=np.float64)
429
+
430
+ # Index update
431
+ index_name = entity.index.name
432
+ if not index_name:
433
+ index_name = "_index"
434
+ self._file[key].attrs["_index"] = index_name
435
+ index_col = self._read_attr(self._file[key], "_index")
436
+ self._write_elem(
437
+ f"{key}/{index_col}", entity.index.to_numpy(), compression=compression
438
+ )
439
+
415
440
  self._file[key].attrs["column-order"] = column_order
416
441
 
417
442
  if "uns" in fields:
@@ -424,7 +449,7 @@ class CapAnnData(BaseLayerMatrixAndDf):
424
449
 
425
450
  for field in ["layers", "obsm", "varm", "obsp", "varp"]:
426
451
  if field in fields:
427
- for key in field_to_entity[field].keys_to_remove:
452
+ for key in self.field_to_entity(field).keys_to_remove:
428
453
  del self._file[f"{field}/{key}"]
429
454
 
430
455
  def create_layer(
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cap_anndata
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
5
5
  Home-page: https://github.com/cellannotation/cap-anndata
6
6
  Author: R. Mukhin, A. Isaev
@@ -14,12 +14,23 @@ Classifier: Operating System :: OS Independent
14
14
  Requires-Python: >=3.9
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
- Requires-Dist: numpy >=1.23.5
18
- Requires-Dist: pandas >=2.2.0
19
- Requires-Dist: anndata >=0.10.0
17
+ Requires-Dist: numpy>=1.23.5
18
+ Requires-Dist: pandas>=2.2.0
19
+ Requires-Dist: anndata>=0.10.0
20
20
  Provides-Extra: dev
21
- Requires-Dist: pytest >=8.0.0 ; extra == 'dev'
22
- Requires-Dist: setuptools ~=69.1.1 ; extra == 'dev'
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Requires-Dist: setuptools~=69.1.1; extra == "dev"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: project-url
30
+ Dynamic: provides-extra
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
23
34
 
24
35
  # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
25
36
 
@@ -0,0 +1,10 @@
1
+ cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
2
+ cap_anndata/backed_df.py,sha256=2OVomvTY51V05sYwEXg-4JYBgd9iJCA2-Lt7nEAL1Ug,3255
3
+ cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
4
+ cap_anndata/cap_anndata.py,sha256=-Lp6wxPncVcl_TaECnE6uHTfD9j_Ow_rScvpAWKK_fs,21081
5
+ cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
6
+ cap_anndata-0.4.0.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
7
+ cap_anndata-0.4.0.dist-info/METADATA,sha256=IXvItMAdXH-CunN3fNlyHPNFmxfoF9dOrU58tl17eLQ,2539
8
+ cap_anndata-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
+ cap_anndata-0.4.0.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
10
+ cap_anndata-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
2
- cap_anndata/backed_df.py,sha256=bMNsArbPjA-TN7eQB4-9Y2l3s8o03-dM4hPnOR9tROc,2622
3
- cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
4
- cap_anndata/cap_anndata.py,sha256=uQh49Kwu2cE4-ebgOvb78mMGA_afkZcsr71j6f8EX2I,20600
5
- cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
6
- cap_anndata-0.3.1.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
7
- cap_anndata-0.3.1.dist-info/METADATA,sha256=688YuF45IuOvu1Hqxbt_O1aeYkoMX4tjV0b2hb1WY8I,2304
8
- cap_anndata-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
9
- cap_anndata-0.3.1.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
10
- cap_anndata-0.3.1.dist-info/RECORD,,