omicsync 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. omicsync-0.1.0/LICENSE +21 -0
  2. omicsync-0.1.0/PKG-INFO +188 -0
  3. omicsync-0.1.0/README.md +120 -0
  4. omicsync-0.1.0/omicsync/__init__.py +33 -0
  5. omicsync-0.1.0/omicsync/core/__init__.py +25 -0
  6. omicsync-0.1.0/omicsync/core/dataset.py +507 -0
  7. omicsync-0.1.0/omicsync/core/modality.py +398 -0
  8. omicsync-0.1.0/omicsync/core/sample_index.py +200 -0
  9. omicsync-0.1.0/omicsync/integration/__init__.py +11 -0
  10. omicsync-0.1.0/omicsync/integration/concat.py +146 -0
  11. omicsync-0.1.0/omicsync/integration/mofa.py +279 -0
  12. omicsync-0.1.0/omicsync/integration/sklearn_compat.py +178 -0
  13. omicsync-0.1.0/omicsync/loaders/__init__.py +19 -0
  14. omicsync-0.1.0/omicsync/loaders/csv.py +147 -0
  15. omicsync-0.1.0/omicsync/loaders/geo.py +111 -0
  16. omicsync-0.1.0/omicsync/loaders/open_targets.py +239 -0
  17. omicsync-0.1.0/omicsync/loaders/tcga.py +251 -0
  18. omicsync-0.1.0/omicsync/normalisation/__init__.py +5 -0
  19. omicsync-0.1.0/omicsync/normalisation/cnv.py +97 -0
  20. omicsync-0.1.0/omicsync/normalisation/methylation.py +131 -0
  21. omicsync-0.1.0/omicsync/normalisation/mutations.py +123 -0
  22. omicsync-0.1.0/omicsync/normalisation/protein.py +54 -0
  23. omicsync-0.1.0/omicsync/normalisation/rna.py +182 -0
  24. omicsync-0.1.0/omicsync/utils/__init__.py +32 -0
  25. omicsync-0.1.0/omicsync/utils/barcode.py +165 -0
  26. omicsync-0.1.0/omicsync/utils/logging.py +44 -0
  27. omicsync-0.1.0/omicsync/utils/validation.py +152 -0
  28. omicsync-0.1.0/omicsync.egg-info/PKG-INFO +188 -0
  29. omicsync-0.1.0/omicsync.egg-info/SOURCES.txt +36 -0
  30. omicsync-0.1.0/omicsync.egg-info/dependency_links.txt +1 -0
  31. omicsync-0.1.0/omicsync.egg-info/requires.txt +29 -0
  32. omicsync-0.1.0/omicsync.egg-info/top_level.txt +1 -0
  33. omicsync-0.1.0/pyproject.toml +72 -0
  34. omicsync-0.1.0/setup.cfg +4 -0
  35. omicsync-0.1.0/tests/test_dataset.py +164 -0
  36. omicsync-0.1.0/tests/test_integration.py +107 -0
  37. omicsync-0.1.0/tests/test_loaders.py +164 -0
  38. omicsync-0.1.0/tests/test_normalisation.py +192 -0
omicsync-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Paterson V.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: omicsync
3
+ Version: 0.1.0
4
+ Summary: Multi-omics data harmonisation for Python
5
+ Author-email: "Paterson V." <citrus.bird72@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Paterson V.
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vi-c-ky/omicsync
29
+ Project-URL: Documentation, https://github.com/vi-c-ky/omicsync/blob/main/docs/index.md
30
+ Project-URL: Repository, https://github.com/vi-c-ky/omicsync
31
+ Project-URL: Bug Tracker, https://github.com/vi-c-ky/omicsync/issues
32
+ Keywords: bioinformatics,multi-omics,TCGA,genomics,data harmonisation
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.9
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
41
+ Requires-Python: >=3.9
42
+ Description-Content-Type: text/markdown
43
+ License-File: LICENSE
44
+ Requires-Dist: pandas>=1.5.0
45
+ Requires-Dist: numpy>=1.23.0
46
+ Requires-Dist: scipy>=1.9.0
47
+ Requires-Dist: scikit-learn>=1.1.0
48
+ Requires-Dist: requests>=2.28.0
49
+ Provides-Extra: mofa
50
+ Requires-Dist: mofapy2>=0.7.0; extra == "mofa"
51
+ Provides-Extra: geo
52
+ Requires-Dist: GEOparse>=2.0.0; extra == "geo"
53
+ Provides-Extra: torch
54
+ Requires-Dist: torch>=1.12.0; extra == "torch"
55
+ Provides-Extra: anndata
56
+ Requires-Dist: anndata>=0.8.0; extra == "anndata"
57
+ Provides-Extra: dev
58
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
59
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
60
+ Requires-Dist: black>=23.0.0; extra == "dev"
61
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
62
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
63
+ Requires-Dist: build>=1.0.0; extra == "dev"
64
+ Requires-Dist: twine>=4.0.0; extra == "dev"
65
+ Provides-Extra: all
66
+ Requires-Dist: omicsync[anndata,geo,mofa,torch]; extra == "all"
67
+ Dynamic: license-file
68
+
69
+ # omicsync
70
+
71
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
72
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
73
+ [![PyPI version](https://img.shields.io/pypi/v/omicsync.svg)](https://pypi.org/project/omicsync/)
74
+
75
+ **A Python library for multi-omics data harmonisation.**
76
+
77
+ omicsync handles the tedious work of aligning sample IDs, normalising each modality consistently, and exporting to downstream tools so you can focus on biology, not data wrangling.
78
+
79
+ ---
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ pip install omicsync
85
+ ```
86
+
87
+ With optional extras:
88
+
89
+ ```bash
90
+ pip install "omicsync[mofa]" # MOFA2 factor analysis
91
+ pip install "omicsync[geo]" # GEO data loading
92
+ pip install "omicsync[anndata]" # AnnData export
93
+ pip install "omicsync[torch]" # PyTorch tensor export
94
+ pip install "omicsync[all]" # Everything
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Quick Start
100
+
101
+ ```python
102
+ import omicsync as oms
103
+ from omicsync.loaders.csv import load_multimodal_csv
104
+
105
+ # Load multiple modalities from CSV files
106
+ dataset = load_multimodal_csv({
107
+ "rna": "brca_rna.tsv",
108
+ "protein": "brca_rppa.tsv",
109
+ "cnv": "brca_cnv.tsv",
110
+ }, study_id="TCGA-BRCA")
111
+
112
+ # Align, normalise, filter — all chainable
113
+ dataset.align_samples().normalize().filter_features(min_variance=0.01)
114
+
115
+ # Export to DataFrame or MOFA2
116
+ df = dataset.to_dataframe() # samples × features, prefixed columns
117
+ mofa_input = dataset.to_mofa2() # dict ready for mofapy2 entry_point
118
+ ```
119
+
120
+ ---
121
+
122
+ ## Features
123
+
124
+ - **Sample harmonisation** — TCGA barcode parsing, fuzzy ID matching, coverage reporting
125
+ - **Per-modality normalisation** — auto-detection of count/TPM/M-value formats
126
+ - **Chainable API** — `dataset.align().normalize().filter_features()`
127
+ - **sklearn compatibility** — use `OmicsSyncTransformer` in a `Pipeline`
128
+ - **Multiple export formats** — DataFrame, dict, MOFA2, PyTorch tensor, AnnData
129
+ - **Open Targets integration** — query target-disease associations via GraphQL
130
+ - **Type hints throughout** — fully typed public API
131
+
132
+ ---
133
+
134
+ ## Supported Data Sources
135
+
136
+ | Source | Loader | Notes |
137
+ |--------|--------|-------|
138
+ | TCGA | `load_tcga_files()` | Local files; barcode auto-harmonisation |
139
+ | GEO | `load_geo()` | Via GEOparse; requires `omicsync[geo]` |
140
+ | CSV/TSV | `load_csv()` | Any tabular file |
141
+ | Open Targets | `load_open_targets_targets()` | GraphQL API v4 |
142
+
143
+ ---
144
+
145
+ ## Supported Modalities
146
+
147
+ | Modality | Class | Default Normalisation |
148
+ |----------|-------|-----------------------|
149
+ | RNA expression | `RNAModality` | `detect_and_normalise()` (log1p) |
150
+ | DNA methylation | `MethylationModality` | M→beta conversion + clip |
151
+ | Copy number | `CNVModality` | log2 ratio, clipped [-2, 2] |
152
+ | Somatic mutations | `MutationModality` | Binarise at threshold |
153
+ | Protein abundance | `ProteinModality` | Z-score per protein |
154
+
155
+ ---
156
+
157
+ ## Documentation
158
+
159
+ - [Quickstart guide](docs/quickstart.md)
160
+ - [API reference](docs/api_reference.md)
161
+ - [Tutorial: TCGA BRCA](docs/tutorials/tcga_brca.md)
162
+ - [Tutorial: Custom CSV data](docs/tutorials/custom_data.md)
163
+
164
+ ---
165
+
166
+ ## Citation
167
+
168
+ If you use omicsync in your research, please cite:
169
+
170
+ > Paterson V. (2026). *omicsync: A Python library for multi-omics data harmonisation*. GitHub: github.com/vi-c-ky/omicsync
171
+
172
+ ---
173
+
174
+ ## Contributing
175
+
176
+ Contributions are welcome. Please open an issue or pull request on GitHub.
177
+
178
+ 1. Fork the repository
179
+ 2. Create a feature branch (`git checkout -b feature/my-feature`)
180
+ 3. Write tests for new functionality
181
+ 4. Run the test suite (`pytest tests/`)
182
+ 5. Open a pull request
183
+
184
+ ---
185
+
186
+ ## License
187
+
188
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,120 @@
1
+ # omicsync
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
5
+ [![PyPI version](https://img.shields.io/pypi/v/omicsync.svg)](https://pypi.org/project/omicsync/)
6
+
7
+ **A Python library for multi-omics data harmonisation.**
8
+
9
+ omicsync handles the tedious work of aligning sample IDs, normalising each modality consistently, and exporting to downstream tools so you can focus on biology, not data wrangling.
10
+
11
+ ---
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install omicsync
17
+ ```
18
+
19
+ With optional extras:
20
+
21
+ ```bash
22
+ pip install "omicsync[mofa]" # MOFA2 factor analysis
23
+ pip install "omicsync[geo]" # GEO data loading
24
+ pip install "omicsync[anndata]" # AnnData export
25
+ pip install "omicsync[torch]" # PyTorch tensor export
26
+ pip install "omicsync[all]" # Everything
27
+ ```
28
+
29
+ ---
30
+
31
+ ## Quick Start
32
+
33
+ ```python
34
+ import omicsync as oms
35
+ from omicsync.loaders.csv import load_multimodal_csv
36
+
37
+ # Load multiple modalities from CSV files
38
+ dataset = load_multimodal_csv({
39
+ "rna": "brca_rna.tsv",
40
+ "protein": "brca_rppa.tsv",
41
+ "cnv": "brca_cnv.tsv",
42
+ }, study_id="TCGA-BRCA")
43
+
44
+ # Align, normalise, filter — all chainable
45
+ dataset.align_samples().normalize().filter_features(min_variance=0.01)
46
+
47
+ # Export to DataFrame or MOFA2
48
+ df = dataset.to_dataframe() # samples × features, prefixed columns
49
+ mofa_input = dataset.to_mofa2() # dict ready for mofapy2 entry_point
50
+ ```
51
+
52
+ ---
53
+
54
+ ## Features
55
+
56
+ - **Sample harmonisation** — TCGA barcode parsing, fuzzy ID matching, coverage reporting
57
+ - **Per-modality normalisation** — auto-detection of count/TPM/M-value formats
58
+ - **Chainable API** — `dataset.align().normalize().filter_features()`
59
+ - **sklearn compatibility** — use `OmicsSyncTransformer` in a `Pipeline`
60
+ - **Multiple export formats** — DataFrame, dict, MOFA2, PyTorch tensor, AnnData
61
+ - **Open Targets integration** — query target-disease associations via GraphQL
62
+ - **Type hints throughout** — fully typed public API
63
+
64
+ ---
65
+
66
+ ## Supported Data Sources
67
+
68
+ | Source | Loader | Notes |
69
+ |--------|--------|-------|
70
+ | TCGA | `load_tcga_files()` | Local files; barcode auto-harmonisation |
71
+ | GEO | `load_geo()` | Via GEOparse; requires `omicsync[geo]` |
72
+ | CSV/TSV | `load_csv()` | Any tabular file |
73
+ | Open Targets | `load_open_targets_targets()` | GraphQL API v4 |
74
+
75
+ ---
76
+
77
+ ## Supported Modalities
78
+
79
+ | Modality | Class | Default Normalisation |
80
+ |----------|-------|-----------------------|
81
+ | RNA expression | `RNAModality` | `detect_and_normalise()` (log1p) |
82
+ | DNA methylation | `MethylationModality` | M→beta conversion + clip |
83
+ | Copy number | `CNVModality` | log2 ratio, clipped [-2, 2] |
84
+ | Somatic mutations | `MutationModality` | Binarise at threshold |
85
+ | Protein abundance | `ProteinModality` | Z-score per protein |
86
+
87
+ ---
88
+
89
+ ## Documentation
90
+
91
+ - [Quickstart guide](docs/quickstart.md)
92
+ - [API reference](docs/api_reference.md)
93
+ - [Tutorial: TCGA BRCA](docs/tutorials/tcga_brca.md)
94
+ - [Tutorial: Custom CSV data](docs/tutorials/custom_data.md)
95
+
96
+ ---
97
+
98
+ ## Citation
99
+
100
+ If you use omicsync in your research, please cite:
101
+
102
+ > Paterson V. (2026). *omicsync: A Python library for multi-omics data harmonisation*. GitHub: github.com/vi-c-ky/omicsync
103
+
104
+ ---
105
+
106
+ ## Contributing
107
+
108
+ Contributions are welcome. Please open an issue or pull request on GitHub.
109
+
110
+ 1. Fork the repository
111
+ 2. Create a feature branch (`git checkout -b feature/my-feature`)
112
+ 3. Write tests for new functionality
113
+ 4. Run the test suite (`pytest tests/`)
114
+ 5. Open a pull request
115
+
116
+ ---
117
+
118
+ ## License
119
+
120
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,33 @@
1
+ """omicsync — Multi-omics data harmonisation for Python."""
2
+
3
+ from omicsync.core.dataset import OmicsDataset
4
+ from omicsync.core.modality import (
5
+ OmicsModality,
6
+ RNAModality,
7
+ MutationModality,
8
+ MethylationModality,
9
+ CNVModality,
10
+ ProteinModality,
11
+ make_modality,
12
+ )
13
+ from omicsync.core.sample_index import SampleIndex
14
+ from omicsync.utils.logging import set_verbose, get_logger
15
+
16
+ __version__ = "0.1.0"
17
+ __author__ = "Paterson V."
18
+ __license__ = "MIT"
19
+
20
+ __all__ = [
21
+ "__version__",
22
+ "OmicsDataset",
23
+ "OmicsModality",
24
+ "RNAModality",
25
+ "MutationModality",
26
+ "MethylationModality",
27
+ "CNVModality",
28
+ "ProteinModality",
29
+ "make_modality",
30
+ "SampleIndex",
31
+ "set_verbose",
32
+ "get_logger",
33
+ ]
@@ -0,0 +1,25 @@
1
+ """Core data structures for omicsync."""
2
+
3
+ from omicsync.core.dataset import OmicsDataset
4
+ from omicsync.core.modality import (
5
+ OmicsModality,
6
+ RNAModality,
7
+ MutationModality,
8
+ MethylationModality,
9
+ CNVModality,
10
+ ProteinModality,
11
+ make_modality,
12
+ )
13
+ from omicsync.core.sample_index import SampleIndex
14
+
15
+ __all__ = [
16
+ "OmicsDataset",
17
+ "OmicsModality",
18
+ "RNAModality",
19
+ "MutationModality",
20
+ "MethylationModality",
21
+ "CNVModality",
22
+ "ProteinModality",
23
+ "make_modality",
24
+ "SampleIndex",
25
+ ]