knn-normalization 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ from importlib.metadata import version
2
+
3
+ from . import pl, pp, tl
4
+
5
+ __all__ = ["pl", "pp", "tl"]
6
+
7
+ __version__ = version("KNN_normalization")
File without changes
File without changes
@@ -0,0 +1 @@
1
+ from .preprocessing import calculate_neighbors_from_protein, retrieve_neighbors
@@ -0,0 +1,60 @@
1
+ import scanpy as sc
2
+
3
+
4
+ def retrieve_neighbors(neighbors):
5
+ """
6
+ Convert the KNN graph into a dictionary mapping each cell to its neighbors.
7
+
8
+ Parameters
9
+ ----------
10
+ neighbors
11
+ KNN graph in the format of ``.obsp["connectivities"]``.
12
+
13
+ Returns
14
+ -------
15
+ dict
16
+ Dictionary mapping each cell index to a list of its neighbor indices.
17
+ """
18
+ row_indices, col_indices = neighbors.nonzero()
19
+ neighbors = {}
20
+ for src, tgt in zip(row_indices, col_indices, strict=False):
21
+ neighbors.setdefault(src, []).append(tgt)
22
+
23
+ return neighbors
24
+
25
+
26
+ def calculate_neighbors_from_protein(protein_data, n_neighbors, log_transform=True):
27
+ """
28
+ Calculate the KNN graph from protein expression data.
29
+
30
+ If the number of proteins is less than 70, neighbors are calculated
31
+ directly on the protein data. Otherwise, PCA is applied first.
32
+
33
+ Parameters
34
+ ----------
35
+ protein_data
36
+ AnnData object with raw protein expression counts.
37
+ n_neighbors
38
+ Number of neighbors to compute.
39
+ log_transform
40
+ Whether to log-transform the protein data before computing neighbors.
41
+
42
+ Returns
43
+ -------
44
+ The KNN graph in scipy.sparse.csr_matrix format.
45
+ """
46
+ data_for_neighbors = protein_data.copy()
47
+ if log_transform:
48
+ sc.pp.log1p(data_for_neighbors)
49
+
50
+ n_proteins = data_for_neighbors.n_vars
51
+ if (
52
+ n_proteins < 70
53
+ ): # If the number of proteins is less than 70, calculate neighbors on the protein data without performing PCA first.
54
+ sc.pp.neighbors(data_for_neighbors, use_rep="X", metric="cosine", n_neighbors=n_neighbors)
55
+ else: # If the number of proteins is more than 70, calculate neighbors on the PCA results of the protein data.
56
+ sc.pp.pca(data_for_neighbors)
57
+ sc.pp.neighbors(data_for_neighbors, metric="cosine", n_neighbors=n_neighbors)
58
+ neighbors = data_for_neighbors.obsp["connectivities"]
59
+
60
+ return neighbors
@@ -0,0 +1 @@
1
+ from .tools import _normalize_with_neighbors, knn_normalize
@@ -0,0 +1,505 @@
1
+ from numbers import Integral
2
+ from typing import Literal
3
+ from warnings import warn
4
+
5
+ import mudata
6
+ import numpy as np
7
+ import scanpy as sc
8
+ from anndata import AnnData
9
+ from mudata import MuData
10
+ from scipy import stats
11
+
12
+ from knn_normalization.pp.preprocessing import calculate_neighbors_from_protein, retrieve_neighbors
13
+
14
+ mudata.set_options(pull_on_update=False)
15
+
16
+
17
+ def knn_normalize(
18
+ data: AnnData | MuData,
19
+ calculate_neighbors_from: Literal["prot", "rna", "use_existing_neighbors"] = "prot",
20
+ preprocess_rna: bool = True,
21
+ log_transform: bool = True,
22
+ n_neighbors: Integral | None = None,
23
+ pseudocount: Integral = 5,
24
+ max_iterations: Integral = 25,
25
+ mean: Literal["average", "geom_mean", "trimmed_mean"] = "average",
26
+ inplace: bool = True,
27
+ save_size_factors: bool = False,
28
+ verbose: bool = True,
29
+ preserve_total_counts=True,
30
+ ):
31
+ """
32
+ Normalize protein expression with KNN normalization.
33
+
34
+ Parameters
35
+ ----------
36
+ data
37
+ AnnData object with protein expression counts or MuData object with
38
+ ``prot`` and ``rna`` modalities.
39
+ calculate_neighbors_from
40
+ Whether to use the ``prot`` or the ``rna`` modality to calculate neighbor
41
+ cells. If ``use_existing_neighbors``, the neighbors already present in the
42
+ protein data will be used.
43
+ preprocess_rna
44
+ If using RNA to calculate neighbors, whether to preprocess the RNA data
45
+ with library size normalization and log-transformation first.
46
+ n_neighbors
47
+ Number of neighbors. If ``None``, calculated automatically as
48
+ ``max(15, min(round(n_cells / 20), 300))``.
49
+ log_transform
50
+ Whether to log-transform the protein data.
51
+ pseudocount
52
+ Pseudocount to add before normalization to avoid zero-division errors.
53
+ max_iterations
54
+ Maximum number of iterations.
55
+ mean
56
+ Type of mean to use: ``'average'``, ``'geom_mean'``, or ``'trimmed_mean'``.
57
+ inplace
58
+ Whether to update the object in place or return a copy.
59
+ save_size_factors
60
+ If ``True``, saves the final size factors to ``data.obs['size_factor']``
61
+ and the size factor history to ``data.obsm['size_factor_history']``.
62
+ verbose
63
+ Whether to print progress messages.
64
+ preserve_total_counts
65
+ Whether to preserve total counts across iterations.
66
+
67
+ Returns
68
+ -------
69
+ Normalized data will be written to ``data`` (if it is an AnnData object) or
70
+ ``data.mod['prot']`` (if it is a MuData object) as an X matrix. If inplace is False,
71
+ returns a new AnnData object (if input is AnnData) or a new MuData object (if input is MuData)
72
+ with the normalized data.
73
+ """
74
+ toreturn = None
75
+
76
+ assert calculate_neighbors_from in ["prot", "rna", "use_existing_neighbors"], (
77
+ "the argument calculate_neighbors_from must be prot, rna or use_existing_neighbors."
78
+ )
79
+
80
+ if isinstance(data, AnnData):
81
+ if ("log1p" in data.uns) and log_transform:
82
+ warn("The protein data might already be log-transformed.", stacklevel=2)
83
+
84
+ if n_neighbors is None:
85
+ n_cells = data.n_obs
86
+ n_neighbors = max(15, min(round(n_cells / 20), 300))
87
+
88
+ if calculate_neighbors_from == "prot":
89
+ neighbors = calculate_neighbors_from_protein(data, n_neighbors=n_neighbors, log_transform=log_transform)
90
+ elif calculate_neighbors_from == "use_existing_neighbors":
91
+ neighbors = data.obsp["connectivities"]
92
+ assert calculate_neighbors_from != "rna", (
93
+ "If an AnnData object with the protein data is provided, ``calculate_neighbors_from`` cannot be ``rna``. If calculate_neighbors_from = rna is desired, please provide a MuData object with the protein and the rna data."
94
+ )
95
+
96
+ if inplace:
97
+ _normalize_with_neighbors(
98
+ data,
99
+ neighbors,
100
+ log_transform=log_transform,
101
+ pseudocount=pseudocount,
102
+ max_iterations=max_iterations,
103
+ verbose=verbose,
104
+ save_size_factors=save_size_factors,
105
+ inplace=True,
106
+ mean=mean,
107
+ preserve_total_counts=preserve_total_counts,
108
+ )
109
+ else:
110
+ knn_normalized_protein = _normalize_with_neighbors(
111
+ data,
112
+ neighbors,
113
+ log_transform=log_transform,
114
+ pseudocount=pseudocount,
115
+ max_iterations=max_iterations,
116
+ verbose=verbose,
117
+ save_size_factors=save_size_factors,
118
+ inplace=False,
119
+ mean=mean,
120
+ preserve_total_counts=preserve_total_counts,
121
+ )
122
+ toreturn = knn_normalized_protein
123
+
124
+ elif isinstance(data, MuData):
125
+ assert "prot" in data.mod, (
126
+ "The MuData object does not have a modality called ``prot``, please add a modality called ``prot`` with the protein data."
127
+ )
128
+
129
+ if ("log1p" in data["prot"].uns) and log_transform:
130
+ warn("The protein data might already be log-transformed.", stacklevel=2)
131
+
132
+ if n_neighbors is None:
133
+ n_cells = data.n_obs
134
+ n_neighbors = max(15, min(round(n_cells / 20), 300))
135
+
136
+ if calculate_neighbors_from == "prot":
137
+ neighbors = calculate_neighbors_from_protein(
138
+ data["prot"], n_neighbors=n_neighbors, log_transform=log_transform
139
+ )
140
+ elif calculate_neighbors_from == "rna":
141
+ assert "rna" in data.mod, (
142
+ "The MuData object does not have a modality called ``rna``, please add a modality called ``rna`` in order to calculate neighbors from the RNA data."
143
+ )
144
+ data_for_neighbors = data["rna"].copy()
145
+ if preprocess_rna:
146
+ sc.pp.normalize_total(data_for_neighbors)
147
+ sc.pp.log1p(data_for_neighbors)
148
+ sc.pp.pca(data_for_neighbors)
149
+ sc.pp.neighbors(data_for_neighbors, n_neighbors=n_neighbors)
150
+ neighbors = data_for_neighbors.obsp["connectivities"]
151
+
152
+ elif calculate_neighbors_from == "use_existing_neighbors":
153
+ neighbors = data["prot"].obsp["connectivities"]
154
+
155
+ if inplace:
156
+ _normalize_with_neighbors(
157
+ data["prot"],
158
+ neighbors,
159
+ log_transform=log_transform,
160
+ pseudocount=pseudocount,
161
+ max_iterations=max_iterations,
162
+ verbose=verbose,
163
+ save_size_factors=save_size_factors,
164
+ inplace=True,
165
+ mean=mean,
166
+ preserve_total_counts=preserve_total_counts,
167
+ )
168
+ else:
169
+ knn_normalized_protein = _normalize_with_neighbors(
170
+ data["prot"],
171
+ neighbors,
172
+ log_transform=log_transform,
173
+ pseudocount=pseudocount,
174
+ max_iterations=max_iterations,
175
+ verbose=verbose,
176
+ save_size_factors=save_size_factors,
177
+ inplace=False,
178
+ mean=mean,
179
+ preserve_total_counts=preserve_total_counts,
180
+ )
181
+ new_mdata = MuData({"rna": data["rna"], "prot": knn_normalized_protein})
182
+ toreturn = new_mdata
183
+
184
+ return toreturn
185
+
186
+
187
+ def _normalize_with_neighbors(
188
+ protein_anndata,
189
+ connectivities,
190
+ log_transform=True,
191
+ log_transform_before=False,
192
+ save_size_factors=False,
193
+ pseudocount=5,
194
+ max_iterations=25,
195
+ change_for_stop=0.0005,
196
+ verbose=True,
197
+ inplace: bool = True,
198
+ mean="average",
199
+ preserve_total_counts=True,
200
+ ):
201
+ """
202
+ Apply KNN normalization given precomputed neighbors.
203
+
204
+ Parameters
205
+ ----------
206
+ protein_anndata
207
+ AnnData object with protein expression data.
208
+ connectivities
209
+ KNN graph containing neighbor cells, in the format of
210
+ ``.obsp["connectivities"]``.
211
+ log_transform
212
+ If ``True``, log-transforms the data after normalization.
213
+ log_transform_before
214
+ If ``True``, log-transforms the data before normalization.
215
+ Cannot be ``True`` at the same time as ``log_transform``.
216
+ save_size_factors
217
+ If ``True``, saves the final size factors to
218
+ ``protein_anndata.obs["size_factor"]`` and the size factor history
219
+ to ``protein_anndata.obsm["size_factor_history"]``.
220
+ pseudocount
221
+ Pseudocount added to avoid zero-division errors.
222
+ max_iterations
223
+ Maximum number of iterations.
224
+ change_for_stop
225
+ Convergence criterion. The algorithm stops when the change in size
226
+ factor between iterations is smaller than this value.
227
+ verbose
228
+ Whether to print progress messages.
229
+ inplace
230
+ Whether to update the AnnData object in place or return a copy.
231
+ mean
232
+ Type of mean to use: ``'average'``, ``'geom_mean'``, or
233
+ ``'trimmed_mean'``.
234
+ preserve_total_counts
235
+ Whether to preserve total counts across iterations.
236
+
237
+ Returns
238
+ -------
239
+ Normalized data will be written to ``protein_anndata.X``. If inplace is False,
240
+ returns a new AnnData object with the normalized data.
241
+ """
242
+ neighbors = retrieve_neighbors(
243
+ connectivities
244
+ ) # Converts the format of the KNN graph into a dictionary mapping each cell to its neighbor cells.
245
+
246
+ if not inplace:
247
+ protein_anndata = protein_anndata.copy()
248
+
249
+ # TODO: FUNCTIONS IN CASE THE DATA IS SPARSE.
250
+
251
+ x = protein_anndata.X
252
+ x += pseudocount # To avoid zero-division, we add a pseudocount.
253
+
254
+ assert not (log_transform_before and log_transform), "log_transform and log_transform_before cannot be both True"
255
+ if log_transform_before:
256
+ x = np.log(x)
257
+ total_sums_before = x.sum()
258
+
259
+ num_cells = x.shape[0]
260
+ size_factor_history = []
261
+
262
+ # KNN normalization.
263
+ for iteration in range(max_iterations):
264
+ size_factors = np.zeros(num_cells)
265
+
266
+ for target_cell, neighbor_list in neighbors.items():
267
+ neighbor_indices = np.array(neighbor_list)
268
+ target_indices = np.full(len(neighbor_list), target_cell)
269
+
270
+ ratios = x[neighbor_indices] / x[target_indices]
271
+ proto_size_factors = np.median(ratios, axis=1)
272
+
273
+ # After having collected the ratios for between the neighbor cells and the target cell, we calculate the average of those ratios. That will be the cell-specific size factor.
274
+ if mean == "average":
275
+ size_factor = np.mean(proto_size_factors)
276
+ elif mean == "trimmed_mean":
277
+ size_factor = stats.trim_mean(proto_size_factors, 0.1)
278
+ else:
279
+ size_factor = stats.gmean(proto_size_factors)
280
+
281
+ size_factors[target_cell] = size_factor
282
+
283
+ # Now, we multiply the protein expression of each cell by its cell-specific factor.
284
+ x *= size_factors[:, None]
285
+
286
+ if preserve_total_counts:
287
+ total_sums_after_iteration = x.sum()
288
+ ratio_of_total_counts = total_sums_after_iteration / total_sums_before
289
+ x = x / ratio_of_total_counts
290
+ size_factors = size_factors / ratio_of_total_counts
291
+
292
+ # Save this iteration's size factors. This is done mainly to compare with the previous iteration for the stopping criterion.
293
+ size_factor_history.append(size_factors)
294
+ if verbose:
295
+ print("Iteration ", iteration + 1)
296
+
297
+ # Unless it's the first iteration, check the algorithm stopping criterion: if all changes of size_factors are smaller than the "change_for_stop" value with respect to the previous iteration.
298
+
299
+ if iteration > 0:
300
+ biggest_size_factor_change = np.max(np.abs(size_factor_history[-1] - size_factor_history[-2]))
301
+ if verbose:
302
+ print("Change wrt previous iteration:", biggest_size_factor_change)
303
+ if biggest_size_factor_change < change_for_stop:
304
+ break
305
+
306
+ if log_transform:
307
+ x = np.log(x)
308
+
309
+ if save_size_factors:
310
+ total_size_factors = np.prod(
311
+ np.array(size_factor_history), axis=0
312
+ ) # Multiplication of the size factors across all iterations.
313
+ protein_anndata.obs["size_factor"] = total_size_factors
314
+ size_factor_history = np.array(size_factor_history).T
315
+ protein_anndata.obsm["size_factor_history"] = size_factor_history
316
+ protein_anndata.obsp["connectivities_KNN_normalization"] = connectivities
317
+
318
+ protein_anndata.X = x
319
+
320
+ return None if inplace else protein_anndata
321
+
322
+
323
+ # def _normalize_with_neighbors(
324
+ # protein_anndata,
325
+ # connectivities,
326
+ # log_transform=True,
327
+ # log_transform_before=False,
328
+ # save_size_factors=False,
329
+ # pseudocount=5,
330
+ # max_iterations=25,
331
+ # change_for_stop=0.0005,
332
+ # verbose=True,
333
+ # inplace: bool = True,
334
+ # mean="average",
335
+ # ):
336
+ # """
337
+ # Applies KNN normalization given precomputed neighbors.
338
+
339
+ # protein_data: an AnnData object with the protein data in CITE-seq.
340
+ # connectivities: The KNN graph containing neighbor cells. It expects the format from .obsp["connectivities"].
341
+ # log_transform: if True, takes the logarithm of the data.
342
+ # save_size_factors: if True, the final size factors are saved to protein_anndata.obs["size_factor"] and the size factor history (all size factors across iterations) is saved to protein_anndata.obsm["size_factor_history"].
343
+ # pseudocount: adds pseudocounts to the data to avoid ZeroDivision errors. This argument also determines the value of the pseudocount (5 by default).
344
+ # max_iteration: maximum number of iterations.
345
+ # change_for_stop: the algorithm stops when the change in size factor is smaller than this value (convergence criterion).
346
+ # verbose: whether you want to print guidance information when running the function.
347
+ # """
348
+ # neighbors = retrieve_neighbors(
349
+ # connectivities
350
+ # ) # Converts the format of the KNN graph into a dictionary mapping each cell to its neighbor cells.
351
+
352
+ # if not inplace:
353
+ # protein_anndata = protein_anndata.copy()
354
+
355
+ # # TODO: FUNCTIONS IN CASE THE DATA IS SPARSE.
356
+
357
+ # x = protein_anndata.X
358
+ # x += pseudocount # To avoid zero-division, we add a pseudocount.
359
+
360
+ # assert not (log_transform_before and log_transform), "log_transform and log_transform_before cannot be both True"
361
+ # if log_transform_before:
362
+ # x = np.log(x)
363
+
364
+ # num_cells = x.shape[0]
365
+ # size_factor_history = []
366
+
367
+ # # KNN normalization.
368
+ # for iteration in range(max_iterations):
369
+ # size_factors = np.zeros(num_cells)
370
+
371
+ # for target_cell, neighbor_list in neighbors.items():
372
+ # neighbor_indices = np.array(neighbor_list)
373
+ # target_indices = np.full(len(neighbor_list), target_cell)
374
+ # ratios = x[neighbor_indices] / x[target_indices]
375
+ # proto_size_factors = np.median(ratios, axis=1)
376
+
377
+ # # After having collected the ratios for between the neighbor cells and the target cell, we calculate the average of those ratios. That will be the cell-specific size factor.
378
+ # if mean == "average":
379
+ # size_factor = np.mean(proto_size_factors)
380
+ # else:
381
+ # size_factor = stats.gmean(proto_size_factors)
382
+ # size_factors[target_cell] = size_factor
383
+
384
+ # # Now, we multiply the protein expression of each cell by its cell-specific factor.
385
+ # x *= size_factors[:, None]
386
+
387
+ # # Save this iteration's size factors. This is done mainly to compare with the previous iteration for the stopping criterion.
388
+ # size_factor_history.append(size_factors)
389
+ # if verbose:
390
+ # print("Iteration ", iteration + 1)
391
+
392
+ # # Unless it's the first iteration, check the algorithm stopping criterion: if all changes of size_factors are smaller than the "change_for_stop" value with respect to the previous iteration.
393
+
394
+ # if iteration > 0:
395
+ # biggest_size_factor_change = np.max(np.abs(size_factor_history[-1] - size_factor_history[-2]))
396
+ # if verbose:
397
+ # print("Change wrt previous iteration:", biggest_size_factor_change)
398
+ # if biggest_size_factor_change < change_for_stop:
399
+ # break
400
+
401
+ # if log_transform:
402
+ # x = np.log(x)
403
+
404
+ # if save_size_factors:
405
+ # total_size_factors = np.prod(
406
+ # np.array(size_factor_history), axis=0
407
+ # ) # Multiplication of the size factors across all iterations.
408
+ # protein_anndata.obs["size_factor"] = total_size_factors
409
+ # size_factor_history = np.array(size_factor_history).T
410
+ # protein_anndata.obsm["size_factor_history"] = size_factor_history
411
+ # protein_anndata.obsp["connectivities_KNN_normalization"] = connectivities
412
+
413
+ # protein_anndata.X = x
414
+
415
+ # return None if inplace else protein_anndata
416
+
417
+
418
+ # def _normalize_with_neighbors(
419
+ # protein_anndata,
420
+ # neighbors,
421
+ # log_transform=True,
422
+ # log_transform_before=False,
423
+ # save_size_factors=False,
424
+ # pseudocount=5,
425
+ # max_iterations=25,
426
+ # change_for_stop=0.0005,
427
+ # verbose=True,
428
+ # inplace: bool = True,
429
+ # mean="average",
430
+ # ):
431
+ # """
432
+ # Applies KNN normalization given precomputed neighbors.
433
+
434
+ # protein_data: an AnnData object with the protein data in CITE-seq.
435
+ # neighbors: Neighbor cells. These neighbors are retrieved with the "retrieve_neighbors" function. The expected format a is dictionary of lists indicating which cells are neighbors.
436
+ # log_transform: if True, takes the logarithm of the data.
437
+ # save_size_factors: if True, the final size factors are saved to protein_anndata.obs["size_factor"] and the size factor history (all size factors across iterations) is saved to protein_anndata.obsm["size_factor_history"].
438
+ # pseudocount: adds pseudocounts to the data to avoid ZeroDivision errors. This argument also determines the value of the pseudocount (5 by default).
439
+ # max_iteration: maximum number of iterations.
440
+ # change_for_stop: the algorithm stops when the change in size factor is smaller than this value (convergence criterion).
441
+ # verbose: whether you want to print guidance information when running the function.
442
+ # """
443
+ # if not inplace:
444
+ # protein_anndata = protein_anndata.copy()
445
+
446
+ # # TODO: FUNCTIONS IN CASE THE DATA IS SPARSE.
447
+
448
+ # x = protein_anndata.X
449
+ # x += pseudocount # To avoid zero-division, we add a pseudocount.
450
+
451
+ # assert not (log_transform_before and log_transform), "log_transform and log_transform_before cannot be both True"
452
+ # if log_transform_before:
453
+ # x = np.log(x)
454
+
455
+ # num_cells = x.shape[0]
456
+ # size_factor_history = []
457
+
458
+ # # KNN normalization.
459
+ # for iteration in range(max_iterations):
460
+ # size_factors = np.zeros(num_cells)
461
+
462
+ # for target_cell, neighbor_list in neighbors.items():
463
+ # neighbor_indices = np.array(neighbor_list)
464
+ # target_indices = np.full(len(neighbor_list), target_cell)
465
+ # ratios = x[neighbor_indices] / x[target_indices]
466
+ # proto_size_factors = np.median(ratios, axis=1)
467
+
468
+ # # After having collected the ratios for between the neighbor cells and the target cell, we calculate the average of those ratios. That will be the cell-specific size factor.
469
+ # if mean == "average":
470
+ # size_factor = np.mean(proto_size_factors)
471
+ # else:
472
+ # size_factor = stats.gmean(proto_size_factors)
473
+ # size_factors[target_cell] = size_factor
474
+
475
+ # # Now, we multiply the protein expression of each cell by its cell-specific factor.
476
+ # x *= size_factors[:, None]
477
+
478
+ # # Save this iteration's size factors. This is done mainly to compare with the previous iteration for the stopping criterion.
479
+ # size_factor_history.append(size_factors)
480
+ # if verbose:
481
+ # print("Iteration ", iteration + 1)
482
+
483
+ # # Unless it's the first iteration, check the algorithm stopping criterion: if all changes of size_factors are smaller than the "change_for_stop" value with respect to the previous iteration.
484
+
485
+ # if iteration > 0:
486
+ # biggest_size_factor_change = np.max(np.abs(size_factor_history[-1] - size_factor_history[-2]))
487
+ # if verbose:
488
+ # print("Change wrt previous iteration:", biggest_size_factor_change)
489
+ # if biggest_size_factor_change < change_for_stop:
490
+ # break
491
+
492
+ # if log_transform:
493
+ # x = np.log(x)
494
+
495
+ # if save_size_factors:
496
+ # total_size_factors = np.prod(
497
+ # np.array(size_factor_history), axis=0
498
+ # ) # Multiplication of the size factors across all iterations.
499
+ # protein_anndata.obs["size_factor"] = total_size_factors
500
+ # size_factor_history = np.array(size_factor_history).T
501
+ # protein_anndata.obsm["size_factor_history"] = size_factor_history
502
+
503
+ # protein_anndata.X = x
504
+
505
+ # return None if inplace else protein_anndata
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: knn-normalization
3
+ Version: 0.1.0
4
+ Summary: Neighbor-based normalization of CITE-seq data
5
+ Project-URL: Documentation, https://knn-normalization.readthedocs.io/
6
+ Project-URL: Homepage, https://github.com/javier-marchena-hurtado/KNN_normalization
7
+ Project-URL: Source, https://github.com/javier-marchena-hurtado/KNN_normalization
8
+ Author: Javier Marchena Hurtado
9
+ Maintainer-email: Javier Marchena Hurtado <javier.marchena.hurtado@gmail.com>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2025, Javier Marchena Hurtado
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Classifier: Programming Language :: Python :: 3 :: Only
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
37
+ Classifier: Programming Language :: Python :: 3.14
38
+ Requires-Python: >=3.10
39
+ Requires-Dist: anndata
40
+ Requires-Dist: mudata
41
+ Requires-Dist: numpy
42
+ Requires-Dist: scanpy
43
+ Requires-Dist: scipy
44
+ Requires-Dist: session-info2
45
+ Provides-Extra: dev
46
+ Requires-Dist: build; extra == 'dev'
47
+ Requires-Dist: pre-commit; extra == 'dev'
48
+ Requires-Dist: twine>=4.0.2; extra == 'dev'
49
+ Provides-Extra: doc
50
+ Requires-Dist: docutils!=0.18.*,!=0.19.*,>=0.8; extra == 'doc'
51
+ Requires-Dist: ipykernel; extra == 'doc'
52
+ Requires-Dist: ipython; extra == 'doc'
53
+ Requires-Dist: myst-nb>=1.1; extra == 'doc'
54
+ Requires-Dist: pandas; extra == 'doc'
55
+ Requires-Dist: setuptools; extra == 'doc'
56
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'doc'
57
+ Requires-Dist: sphinx-book-theme>=1; extra == 'doc'
58
+ Requires-Dist: sphinx-copybutton; extra == 'doc'
59
+ Requires-Dist: sphinx-tabs; extra == 'doc'
60
+ Requires-Dist: sphinx>=4; extra == 'doc'
61
+ Requires-Dist: sphinxcontrib-bibtex>=1; extra == 'doc'
62
+ Requires-Dist: sphinxext-opengraph; extra == 'doc'
63
+ Provides-Extra: test
64
+ Requires-Dist: coverage; extra == 'test'
65
+ Requires-Dist: pytest; extra == 'test'
66
+ Description-Content-Type: text/markdown
67
+
68
+ # KNN normalization
69
+
70
+ [![Tests][badge-tests]][tests]
71
+ [![Documentation][badge-docs]][documentation]
72
+
73
+ [badge-tests]: https://img.shields.io/github/actions/workflow/status/javier-marchena-hurtado/KNN_normalization/test.yaml?branch=main
74
+ [badge-docs]: https://img.shields.io/readthedocs/KNN_normalization
75
+
76
+ <img src="https://raw.githubusercontent.com/javier-marchena-hurtado/KNN_normalization/main/images/KNN_normalization_logo.png" width="250">
77
+
78
+ ## Background and motivation
79
+
80
+ KNN normalization is a normalization method for protein counts in CITE-seq data. KNN normalization learns from neighbor cells in a KNN graph in order to estimate the appropriate total protein counts in each cell. KNN normalization accurately estimates total protein counts while preserving biological information.
81
+
82
+ ## Getting started
83
+
84
+ Please refer to the [documentation][],
85
+ in particular, the [API documentation][].
86
+
87
+ ## Installation
88
+
89
+
90
+ <!--
91
+ 1) Install the latest release of `KNN_normalization` from [PyPI][]:
92
+
93
+ ```bash
94
+ pip install KNN_normalization
95
+ ```
96
+ -->
97
+ Install the latest development version:
98
+
99
+ ```bash
100
+ pip install git+https://github.com/javier-marchena-hurtado/KNN_normalization.git@main
101
+ ```
102
+
103
+ ## Release notes
104
+
105
+ See the [changelog][].
106
+
107
+ ## Contact
108
+
109
+ For questions and help requests, please open a [discussion][] on GitHub.
110
+ If you found a bug, please use the [issue tracker][].
111
+
112
+ ## Citation
113
+
114
+ > t.b.a
115
+
116
+ [issue tracker]: https://github.com/javier-marchena-hurtado/KNN_normalization/issues
117
+ [tests]: https://github.com/javier-marchena-hurtado/KNN_normalization/actions/workflows/test.yaml
118
+ [documentation]: https://knn-normalization.readthedocs.io
119
+ [changelog]: https://knn-normalization.readthedocs.io/en/latest/changelog.html
120
+ [api documentation]: https://knn-normalization.readthedocs.io/en/latest/api.html
121
+ [pypi]: https://pypi.org/project/KNN_normalization
122
+ [discussion]: https://github.com/javier-marchena-hurtado/KNN_normalization/discussions
@@ -0,0 +1,11 @@
1
+ knn_normalization/__init__.py,sha256=91NobikqQ6ujn6mjyirBBBKGYosH-X2w4u2dtiTWsEQ,139
2
+ knn_normalization/pl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ knn_normalization/pl/plotting.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ knn_normalization/pp/__init__.py,sha256=8revjs6LifS_m7smQvlIS3EU3BJtG4VJedZ0MXGqJLY,80
5
+ knn_normalization/pp/preprocessing.py,sha256=xBrLokt7Nv5xbGjer_nQkUS-pjRBOuFF1STDKzbBmn8,1945
6
+ knn_normalization/tl/__init__.py,sha256=Sk8ckeXIjHWcuGowidNJduZPEc_UKRwtyeFwAtTzwyg,60
7
+ knn_normalization/tl/tools.py,sha256=tpXZqIiXskTpCaOlmnuvppwewyvmU-Z0ggdGxhwFp2s,20992
8
+ knn_normalization-0.1.0.dist-info/METADATA,sha256=NH25-tkPV-pmTJx16obt3XV-c-anzp2TpnRqHTYO5Eo,5035
9
+ knn_normalization-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ knn_normalization-0.1.0.dist-info/licenses/LICENSE,sha256=zL2KEEuPAgaYdVcUNXrCmgdBG_abnDl7zmgSJqcBoQw,1081
11
+ knn_normalization-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025, Javier Marchena Hurtado
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.