SpatialQuery 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. spatialquery-0.0.3/MANIFEST.in +6 -0
  2. spatialquery-0.0.3/PKG-INFO +379 -0
  3. spatialquery-0.0.3/README.md +343 -0
  4. spatialquery-0.0.3/SpatialQuery/__init__.py +26 -0
  5. spatialquery-0.0.3/SpatialQuery/plotting.py +1267 -0
  6. spatialquery-0.0.3/SpatialQuery/scfind4sp/__init__.py +3 -0
  7. spatialquery-0.0.3/SpatialQuery/scfind4sp/core_methods.py +1307 -0
  8. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/EliasFano.h +180 -0
  9. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/QueryScore.cpp +179 -0
  10. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/QueryScore.h +32 -0
  11. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/Serialization.h +72 -0
  12. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/__init__.py +1 -0
  13. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/const.h +7 -0
  14. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/eliasFano.cpp +1657 -0
  15. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/fp_growth.cpp +245 -0
  16. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/fp_growth.h +41 -0
  17. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/serialization.cpp +381 -0
  18. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/typedef.h +418 -0
  19. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/utils.cpp +354 -0
  20. spatialquery-0.0.3/SpatialQuery/scfind4sp/cpp_src/utils.h +72 -0
  21. spatialquery-0.0.3/SpatialQuery/spatial_differential_pattern.py +550 -0
  22. spatialquery-0.0.3/SpatialQuery/spatial_gene_covarying.py +4941 -0
  23. spatialquery-0.0.3/SpatialQuery/spatial_query.py +1691 -0
  24. spatialquery-0.0.3/SpatialQuery/spatial_query_multiple_fov.py +1826 -0
  25. spatialquery-0.0.3/SpatialQuery/spatial_utils.py +858 -0
  26. {spatialquery-0.0.1 → spatialquery-0.0.3}/SpatialQuery/utils.py +29 -24
  27. spatialquery-0.0.3/SpatialQuery.egg-info/PKG-INFO +379 -0
  28. spatialquery-0.0.3/SpatialQuery.egg-info/SOURCES.txt +33 -0
  29. spatialquery-0.0.3/SpatialQuery.egg-info/requires.txt +19 -0
  30. spatialquery-0.0.3/SpatialQuery.egg-info/top_level.txt +2 -0
  31. spatialquery-0.0.3/pyproject.toml +52 -0
  32. spatialquery-0.0.3/setup.py +28 -0
  33. spatialquery-0.0.1/MANIFEST.in +0 -3
  34. spatialquery-0.0.1/PKG-INFO +0 -56
  35. spatialquery-0.0.1/README.md +0 -24
  36. spatialquery-0.0.1/SpatialQuery/__init__.py +0 -3
  37. spatialquery-0.0.1/SpatialQuery/spatial_query.py +0 -1339
  38. spatialquery-0.0.1/SpatialQuery/spatial_query_multiple_fov.py +0 -1116
  39. spatialquery-0.0.1/SpatialQuery.egg-info/PKG-INFO +0 -56
  40. spatialquery-0.0.1/SpatialQuery.egg-info/SOURCES.txt +0 -13
  41. spatialquery-0.0.1/SpatialQuery.egg-info/requires.txt +0 -8
  42. spatialquery-0.0.1/SpatialQuery.egg-info/top_level.txt +0 -1
  43. spatialquery-0.0.1/setup.py +0 -31
  44. {spatialquery-0.0.1 → spatialquery-0.0.3}/LICENSE +0 -0
  45. {spatialquery-0.0.1 → spatialquery-0.0.3}/SpatialQuery.egg-info/dependency_links.txt +0 -0
  46. {spatialquery-0.0.1 → spatialquery-0.0.3}/setup.cfg +0 -0
@@ -0,0 +1,6 @@
1
+ include README.md
2
+ include LICENSE
3
+ include setup.py
4
+ include pyproject.toml
5
+
6
+ recursive-include SpatialQuery/scfind4sp/cpp_src *.cpp *.h
@@ -0,0 +1,379 @@
1
+ Metadata-Version: 2.4
2
+ Name: SpatialQuery
3
+ Version: 0.0.3
4
+ Summary: Spatial query tools for analyzing spatial transcriptomics data
5
+ Author-email: Shaokun An <shan12@bwh.harvard.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/ShaokunAn/Spatial-Query
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: numpy>=1.23.0
18
+ Requires-Dist: anndata>=0.8.0
19
+ Requires-Dist: pandas>=2.0.3
20
+ Requires-Dist: scipy>=1.10.0
21
+ Requires-Dist: matplotlib>=3.7.5
22
+ Requires-Dist: scikit-learn>=1.3.2
23
+ Requires-Dist: scanpy>=1.9.5
24
+ Requires-Dist: statsmodels>=0.14.0
25
+ Requires-Dist: seaborn>=0.13.2
26
+ Requires-Dist: mlxtend>=0.23.1
27
+ Requires-Dist: tqdm>=4.60.0
28
+ Requires-Dist: zarr>=2.14.0
29
+ Provides-Extra: docs
30
+ Requires-Dist: sphinx>=5.3; extra == "docs"
31
+ Requires-Dist: sphinx-rtd-theme; extra == "docs"
32
+ Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
33
+ Requires-Dist: sphinx-copybutton; extra == "docs"
34
+ Requires-Dist: myst-parser; extra == "docs"
35
+ Dynamic: license-file
36
+
37
+ # Spatial-Query
38
+
39
+ A Python package for fast spatial query and analysis of Spatial Transcriptomics (ST) data. Spatial-Query provides efficient methods to identify frequent patterns, perform motif enrichment analysis, and conduct differential expression analysis in spatial transcriptomics datasets.
40
+
41
+ ## Features
42
+
43
+ - **Single FOV Analysis**: Analyze spatial patterns within individual fields of view
44
+ - **Multi-FOV Analysis**: Compare patterns across multiple fields of view or datasets
45
+ - **Fast Spatial Queries**: Built on k-D tree for efficient spatial neighborhood queries
46
+ - **Pattern Mining**: Identify frequent cell type patterns using FP-Growth algorithm
47
+ - **Motif Enrichment**: Statistical analysis of spatial motif enrichment
48
+ - **Differential Expression**: Gene expression analysis with Fisher's exact test
49
+ - **Visualization**: Comprehensive plotting functions for spatial data
50
+
51
+ ## Installation
52
+
53
+ ### From GitHub Repository
54
+
55
+ ```bash
56
+ # Clone the repository
57
+ git clone https://github.com/ShaokunAn/Spatial-Query.git
58
+ cd Spatial-Query
59
+
60
+ # Install in development mode
61
+ pip install .
62
+
63
+ # Or install directly from GitHub
64
+ pip install git+https://github.com/ShaokunAn/Spatial-Query.git@main
65
+ ```
66
+
67
+ ### Dependencies
68
+
69
+ The package requires the following dependencies:
70
+ - Python >= 3.8
71
+ - numpy, pandas, scipy
72
+ - matplotlib, seaborn
73
+ - scikit-learn
74
+ - scanpy, anndata
75
+ - mlxtend
76
+ - statsmodels
77
+ - pybind11 (for C++ extensions)
78
+
79
+ ## Quick Start
80
+
81
+ ### Single FOV Analysis
82
+
83
+ ```python
84
+ import scanpy as sc
85
+ from SpatialQuery import spatial_query
86
+
87
+ # Load your spatial transcriptomics data
88
+ adata = sc.read_h5ad("your_data.h5ad")
89
+
90
+ # Initialize spatial query object
91
+ sq = spatial_query(
92
+ adata=adata,
93
+ dataset="ST_sample",
94
+ spatial_key="X_spatial", # spatial coordinates in adata.obsm
95
+ label_key="predicted_label", # cell type labels in adata.obs
96
+ build_gene_index=False, # build gene expression index. If set True, build scfind index otherwise use adata.X directly for DE gene analysis
97
+ feature_name="gene_ids", # gene names in adata.var
98
+ if_lognorm=True # perfrom log-normalization of adata.X if True when initializing spatial_query object.
99
+
100
+ )
101
+
102
+ # Find frequent patterns around a specific cell type
103
+ fp_results = sq.find_fp_knn(
104
+ ct="T_cell", # anchors cells for neighborhood analysis
105
+ k=30, # number of neighbors
106
+ min_support=0.5 # minimum frequency support threshold
107
+ )
108
+
109
+ # Perform motif enrichment analysis
110
+ enrichment_results = sq.motif_enrichment_knn(
111
+ ct="T_cell", # center cell type as anchors
112
+ motifs=["T_cell", "B_cell"], # motif to test. If None, frequent patterns will be searched first for enrichment analysis
113
+ k=30, # number of neighbors
114
+ min_support=0.5, # minimum frequency support threshold
115
+ max_dist=200, # maximum distance for neighbors
116
+ return_cellID=False # whether to return cell IDs for each motif and center cells
117
+ )
118
+
119
+ # Differential expression analysis
120
+ de_results = sq.de_genes(
121
+ ind_group1=[0, 1, 2, 3], # indices of group 1 cells
122
+ ind_group2=[4, 5, 6, 7], # indices of group 2 cells
123
+ method="fisher" # Fisher's exact test
124
+ )
125
+
126
+ # Visualize results
127
+ sq.plot_fov(fig_size=(10, 8)) # Plot spatial data with cell types
128
+ sq.plot_motif_grid(motif=["T_cell", "B_cell"], max_dist=50) # Plot motif around grid points
129
+ sq.plot_motif_celltype(
130
+ ct="T_cell", # center cell type
131
+ motif=["T_cell", "B_cell"], # motif to visualize
132
+ max_dist=100, # radius for neighborhood
133
+ fig_size=(10, 5),
134
+ save_path=None # path to save figure, None for display only
135
+ )
136
+ ```
137
+
138
+ ### Multi-FOV Analysis
139
+
140
+ ```python
141
+ from SpatialQuery import spatial_query_multi
142
+
143
+ # Prepare multiple datasets
144
+ adatas = [adata1, adata2, adata3] # List of AnnData objects
145
+ datasets = ["healthy", "healthy", "disease"] # Dataset names.
146
+
147
+ # Initialize multi-FOV spatial query
148
+ sq_multi = spatial_query_multi(
149
+ adatas=adatas,
150
+ datasets=datasets,
151
+ spatial_key="X_spatial",
152
+ label_key="predicted_label",
153
+ build_gene_index=True,
154
+ feature_name="gene_ids"
155
+ )
156
+
157
+ # Find frequent patterns across datasets
158
+ fp_multi = sq_multi.find_fp_knn(
159
+ ct="T_cell",
160
+ dataset=["healthy"], # specific datasets
161
+ k=30,
162
+ min_support=0.5
163
+ )
164
+
165
+ # Motif enrichment analysis across datasets
166
+ motif_results = sq_multi.motif_enrichment_knn(
167
+ ct="T_cell", # center cell type
168
+ motifs=["T_cell", "B_cell"], # motifs to test
169
+ dataset=["healthy", "disease"], # datasets to compare
170
+ k=30,
171
+ min_support=0.5,
172
+ max_dist=200
173
+ )
174
+
175
+ # Differential pattern analysis across datasets
176
+ diff_results = sq_multi.differential_analysis_knn(
177
+ ct="T_cell", # center cell type
178
+ datasets=["healthy", "disease"], # exactly 2 datasets for comparison
179
+ k=30, # number of neighbors
180
+ min_support=0.5, # minimum support threshold
181
+ max_dist=200 # maximum distance for neighbors
182
+ )
183
+
184
+ # Differential gene expression analysis across specified groups using per-dataset indices
185
+ from collections import defaultdict
186
+
187
+ # Example: keys are modified dataset names (e.g., "healthy_0", "healthy_1"), values are index lists for that dataset
188
+ ind_group1 = defaultdict(list)
189
+ ind_group1["healthy_0"] = [0, 1, 2]
190
+ ind_group1["healthy_1"] = [0, 1]
191
+
192
+ ind_group2 = defaultdict(list)
193
+ ind_group2["disease_0"] = [3, 4]
194
+
195
+
196
+ de_multi = sq_multi.de_genes(
197
+ ind_group1=ind_group1, # group 1: dict keys as dataset names, values as indices in each dataset
198
+ ind_group2=ind_group2, # group 2: same structure
199
+ genes=["Gene_1", "Gene_2"], # Genes of interest; uses all genes if no genes are input
200
+ method="fisher" # method to perform differential gene analysis
201
+ )
202
+
203
+ # Cell type distribution analysis across datasets
204
+ dist_results = sq_multi.cell_type_distribution() # overall distribution
205
+ dist_fov = sq_multi.cell_type_distribution_fov() # per-FOV distribution
206
+
207
+ # Visualize results for each FOV
208
+ for i, sq in enumerate(sq_multi.spatial_queries):
209
+ sq.plot_fov(fig_size=(8, 6))
210
+ sq.plot_motif_celltype(
211
+ ct="T_cell",
212
+ motif=["T_cell", "B_cell"],
213
+ max_dist=50
214
+ )
215
+ ```
216
+
217
+ ## Core Classes and Methods
218
+
219
+ ### `spatial_query` Class (Single FOV)
220
+
221
+ The main class for analyzing spatial patterns within a single field of view.
222
+
223
+ #### Key Methods:
224
+
225
+ - **`find_fp_knn(ct, k, min_support)`**: Find frequent patterns around a cell type using k-nearest neighbors
226
+ - **`find_fp_dist(ct, max_dist, min_support)`**: Find frequent patterns using distance-based neighborhoods
227
+ - **`motif_enrichment_knn(ct, motifs, k, min_support, max_dist)`**: Test motif enrichment using k-NN neighborhoods
228
+ - **`motif_enrichment_dist(ct, motifs, max_dist, min_support)`**: Test motif enrichment using distance-based neighborhoods
229
+ - **`find_patterns_grid(max_dist, min_support)`**: Find patterns using grid-based sampling
230
+ - **`find_patterns_rand(max_dist, n_points, min_support)`**: Find patterns using random sampling
231
+ - **`de_genes(ind_group1, ind_group2, method)`**: Differential expression analysis
232
+ - **`plot_fov(fig_size)`**: Visualize the spatial data
233
+ - **`plot_motif_grid(motif, max_dist)`**: Plot motif distribution around grid points
234
+ - **`plot_motif_rand(motif, max_dist, n_points)`**: Plot motif distribution around random sampled points
235
+ - **`plot_motif_celltype(motif, ct, max_dist)`**: Plot motif around specific cell types
236
+
237
+ #### Parameters:
238
+ - `adata`: AnnData object containing spatial transcriptomics data
239
+ - `dataset`: Dataset name (default: 'ST')
240
+ - `spatial_key`: Key for spatial coordinates in `adata.obsm` (default: 'X_spatial')
241
+ - `label_key`: Key for cell type labels in `adata.obs` (default: 'predicted_label')
242
+ - `build_gene_index`: Whether to build gene expression index with scfind (default: False)
243
+ - `feature_name`: Gene names key in `adata.var` (required if `build_gene_index=True`)
244
+
245
+ ### `spatial_query_multi` Class (Multi-FOV)
246
+
247
+ The main class for analyzing spatial patterns across multiple fields of view or datasets.
248
+
249
+ #### Key Methods:
250
+
251
+ - **`find_fp_knn(ct, dataset, k, min_support)`**: Find frequent patterns across specified datasets
252
+ - **`find_fp_dist(ct, dataset, max_dist, min_support)`**: Find patterns using distance-based neighborhoods
253
+ - **`motif_enrichment_knn(ct, motifs, dataset, k, min_support, max_dist)`**: Test motif enrichment across datasets
254
+ - **`motif_enrichment_dist(ct, motifs, dataset, max_dist, min_support)`**: Distance-based motif enrichment
255
+ - **`differential_analysis_knn(ct, datasets, k, min_support, max_dist)`**: Compare patterns between dataset groups
256
+ - **`differential_analysis_dist(ct, datasets, max_dist, min_support)`**: Distance-based differential pattern analysis
257
+ - **`de_genes(ind_group1, ind_group2, gene, method)`**: Differential expression analysis
258
+ - **`cell_type_distribution()`**: Analyze cell type distribution across datasets
259
+ - **`cell_type_distribution_fov()`**: Cell type distribution per FOV
260
+
261
+ #### Parameters:
262
+ - `adatas`: List of AnnData objects
263
+ - `datasets`: List of dataset names
264
+ - `spatial_key`: Key for spatial coordinates
265
+ - `label_key`: Key for cell type labels
266
+ - `build_gene_index`: Whether to build gene expression indices
267
+
268
+ ## Data Format Requirements
269
+
270
+ ### AnnData Object Structure
271
+
272
+ Your AnnData object should contain:
273
+
274
+ - **`adata.obsm['X_spatial']`**: Spatial coordinates (n_cells × 2)
275
+ - **`adata.obs['predicted_label']`**: Cell type labels
276
+ - **`adata.var['gene_ids']`**: Gene names (if using gene expression analysis)
277
+ - **`adata.X`**: Gene expression matrix (if using gene expression analysis)
278
+
279
+ ### Example Data Preparation
280
+
281
+ ```python
282
+ import scanpy as sc
283
+ import pandas as pd
284
+ import numpy as np
285
+
286
+ # Create example spatial transcriptomics data
287
+ n_cells = 1000
288
+ n_genes = 2000
289
+
290
+ # Spatial coordinates (2D coordinates for each cell)
291
+ spatial_coords = np.random.rand(n_cells, 2) * 100
292
+
293
+ # Cell type labels (annotated cell types)
294
+ cell_types = np.random.choice(['T_cell', 'B_cell', 'Macrophage', 'Neuron'], n_cells)
295
+
296
+ # Gene expression matrix (cells × genes)
297
+ expression_matrix = np.random.negative_binomial(5, 0.3, (n_cells, n_genes))
298
+
299
+ # Create AnnData object
300
+ adata = sc.AnnData(X=expression_matrix)
301
+ adata.obsm['X_spatial'] = spatial_coords # Required: spatial coordinates
302
+ adata.obs['predicted_label'] = cell_types # Required: cell type labels
303
+ adata.var['gene_ids'] = [f'Gene_{i}' for i in range(n_genes)] # Required for gene analysis
304
+
305
+ # Optional: Add gene names as index
306
+ adata.var_names = adata.var['gene_ids']
307
+
308
+ # Optional: Add metadata
309
+ adata.obs['sample_id'] = ['sample_1'] * n_cells
310
+ adata.obs['region'] = np.random.choice(['cortex', 'medulla'], n_cells)
311
+ ```
312
+
313
+ ### Loading Real Data
314
+
315
+ ```python
316
+ # Load from common spatial transcriptomics formats
317
+ import scanpy as sc
318
+
319
+ # Load 10X Visium data
320
+ adata = sc.read_10x_h5("filtered_feature_bc_matrix.h5")
321
+ adata.var_names_unique()
322
+
323
+ # Load spatial coordinates (from spaceranger output)
324
+ spatial_coords = pd.read_csv("spatial/tissue_positions_list.csv",
325
+ header=None, index_col=0)
326
+ spatial_coords = spatial_coords[[1, 2]].values # x, y coordinates
327
+ adata.obsm['X_spatial'] = spatial_coords
328
+
329
+ # Load cell type annotations (from external analysis)
330
+ cell_types = pd.read_csv("cell_type_annotations.csv")
331
+ adata.obs['predicted_label'] = cell_types['cell_type'].values
332
+
333
+ # Initialize spatial query
334
+ sq = spatial_query(adata, build_gene_index=False, feature_name="gene_ids")
335
+ ```
336
+
337
+ ## Advanced Usage
338
+
339
+ ### Custom Spatial Analysis
340
+
341
+ ```python
342
+ # Custom neighborhood analysis
343
+ sq = spatial_query(adata, build_gene_index=True)
344
+
345
+ # Find patterns with custom parameters
346
+ fp_results = sq.find_fp_knn(
347
+ ct="T_cell",
348
+ k=50, # larger neighborhood
349
+ min_support=0.3 # lower support threshold
350
+ )
351
+
352
+ # Test specific motifs
353
+ motif_results = sq.motif_enrichment_knn(
354
+ ct="T_cell",
355
+ motifs=["T_cell", "B_cell", "Macrophage"],
356
+ k=30,
357
+ min_support=0.5,
358
+ max_dist=200
359
+ )
360
+ ```
361
+
362
+
363
+ ## Contributing
364
+
365
+ Contributions are welcome! Please feel free to submit a Pull Request.
366
+
367
+ ## License
368
+
369
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
370
+
371
+ ## Contact
372
+
373
+ - **Author**: Shaokun An
374
+ - **Email**: shaokunan1@gmail.com
375
+ - **GitHub**: [@ShaokunAn](https://github.com/ShaokunAn)
376
+
377
+ ## Acknowledgments
378
+
379
+ This package builds upon several excellent open-source libraries including scanpy, scikit-learn, and mlxtend.