biomedical-knowledge-lookup 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. biomedical_knowledge_lookup-1.0.0/LICENSE +21 -0
  2. biomedical_knowledge_lookup-1.0.0/PKG-INFO +289 -0
  3. biomedical_knowledge_lookup-1.0.0/README.md +245 -0
  4. biomedical_knowledge_lookup-1.0.0/pyproject.toml +133 -0
  5. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/__init__.py +112 -0
  6. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/__main__.py +213 -0
  7. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/__init__.py +123 -0
  8. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/additional_adapters.py +340 -0
  9. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/biolinker_adapter.py +526 -0
  10. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/bioontology_adapter.py +443 -0
  11. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/bioportal_adapter.py +210 -0
  12. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/chembl_adapter.py +806 -0
  13. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/clinvar_adapter.py +159 -0
  14. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/cosmic_adapter.py +134 -0
  15. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/dbpedia_adapter.py +184 -0
  16. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/disgenet_adapter.py +274 -0
  17. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/drugbank_adapter.py +135 -0
  18. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/ebiols_adapter.py +16 -0
  19. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/ensembl_adapter.py +105 -0
  20. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/europepmc_adapter.py +138 -0
  21. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/eutils_adapter.py +297 -0
  22. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/geneontology_adapter.py +109 -0
  23. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/hgnc_adapter.py +153 -0
  24. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/hpo_adapter.py +127 -0
  25. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/interpro_adapter.py +134 -0
  26. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/kegg_adapter.py +142 -0
  27. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/mondo_adapter.py +141 -0
  28. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/obofoundry_adapter.py +84 -0
  29. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/ols_adapter.py +213 -0
  30. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/omim_adapter.py +139 -0
  31. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/opentargets_adapter.py +177 -0
  32. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/oxo_adapter.py +383 -0
  33. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/pdb_adapter.py +138 -0
  34. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/pfam_adapter.py +124 -0
  35. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/pubchem_adapter.py +103 -0
  36. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/quickgo_adapter.py +207 -0
  37. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/reactome_adapter.py +137 -0
  38. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/string_adapter.py +136 -0
  39. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/tyto_adapter.py +63 -0
  40. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/umls_adapter.py +190 -0
  41. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/unichem_adapter.py +735 -0
  42. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/uniprot_adapter.py +129 -0
  43. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/wikidata_adapter.py +214 -0
  44. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/adapters/zooma_adapter.py +91 -0
  45. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/base.py +227 -0
  46. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/benchmarking.py +178 -0
  47. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/cache.py +628 -0
  48. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/cache_demo.py +178 -0
  49. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/central_lookup.py +1148 -0
  50. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/examples.py +398 -0
  51. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/exports.py +118 -0
  52. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/factory.py +23 -0
  53. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/generated_models/__init__.py +1 -0
  54. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/generated_models/biomedical_knowledge_models.py +975 -0
  55. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/generated_models/models.py +0 -0
  56. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/helpers.py +81 -0
  57. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/models.py +375 -0
  58. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/multi_source_annotator.py +743 -0
  59. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/ontology_concept_loader.py +261 -0
  60. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/rdf_converter.py +825 -0
  61. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/CROSSWALK_CLIENT_README.md +507 -0
  62. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/README.md +630 -0
  63. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/auth.py +189 -0
  64. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/client.py +67 -0
  65. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/code_lookup_client.py +606 -0
  66. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/concept_lookup_client.py +567 -0
  67. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/concepts.py +289 -0
  68. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/crosswalk_client.py +565 -0
  69. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/enhanced_search_client.py +758 -0
  70. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/hierarchy_walker_client.py +676 -0
  71. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/main_client.py +318 -0
  72. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/metadata.py +66 -0
  73. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/models.py +60 -0
  74. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/names_retrieval_client.py +667 -0
  75. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/search.py +170 -0
  76. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/semantic_types_client.py +585 -0
  77. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/string_concept_client.py +585 -0
  78. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/__init__.py +15 -0
  79. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/integration/__init__.py +7 -0
  80. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/integration/test_umls_client_integration.py +499 -0
  81. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/test_runner.py +265 -0
  82. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/unit/__init__.py +7 -0
  83. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/unit/test_code_lookup_client.py +458 -0
  84. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/umls/tests/unit/test_semantic_types_client.py +387 -0
  85. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/utils/retry_utils.py +235 -0
  86. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/utils/timing_utils.py +453 -0
  87. biomedical_knowledge_lookup-1.0.0/src/knowledge_lookup/validation_models.py +218 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Jonas Heinicke
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.4
2
+ Name: biomedical-knowledge-lookup
3
+ Version: 1.0.0
4
+ Summary: Unified biological concept lookup across 29+ biomedical knowledge sources including BioPortal, OLS, UMLS, ChEMBL, DisGeNET, and more
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: biomedical,ontology,knowledge-graph,bioinformatics,semantic-web,rdf,bioportal,ols,umls
8
+ Author: Jonas Heinicke
9
+ Author-email: jonas.heinicke@helmholtz-hzi.de
10
+ Requires-Python: >=3.10,<4.0
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
21
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
22
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
23
+ Requires-Dist: aiohttp (>=3.9.0,<4.0.0)
24
+ Requires-Dist: backoff (>=2.2.0,<3.0.0)
25
+ Requires-Dist: bioservices (>=1.16.0,<2.0.0)
26
+ Requires-Dist: chembl-webresource-client (>=0.10.9,<0.11.0)
27
+ Requires-Dist: pandas (>=2.0.0,<3.0.0)
28
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
29
+ Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
30
+ Requires-Dist: rdflib (>=7.0.0,<8.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: rich (>=13.0.0,<14.0.0)
33
+ Requires-Dist: setuptools (>=82.0.1,<83.0.0)
34
+ Requires-Dist: tenacity (>=8.2.0,<9.0.0)
35
+ Requires-Dist: typer (>=0.12.0,<1.0.0)
36
+ Requires-Dist: tyto (>=1.4,<2.0)
37
+ Project-URL: Changelog, https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/blob/main/CHANGELOG.md
38
+ Project-URL: Documentation, https://jonasheinickeBio.github.io/biomedical-knowledge-lookup/
39
+ Project-URL: Homepage, https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup
40
+ Project-URL: Issue Tracker, https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/issues
41
+ Project-URL: Repository, https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup
42
+ Description-Content-Type: text/markdown
43
+
44
+ # 🧬 Biomedical Knowledge Lookup
45
+
46
+ [![PyPI version](https://badge.fury.io/py/biomedical-knowledge-lookup.svg)](https://pypi.org/project/biomedical-knowledge-lookup/)
47
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
49
+ [![Tests](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/actions/workflows/tests.yml/badge.svg)](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/actions/workflows/tests.yml)
50
+ [![Coverage](https://img.shields.io/codecov/c/github/JonasHeinickeBio/biomedical-knowledge-lookup)](https://codecov.io/gh/JonasHeinickeBio/biomedical-knowledge-lookup)
51
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
52
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://jonasheinickeBio.github.io/biomedical-knowledge-lookup/)
53
+ [![PyPI downloads](https://img.shields.io/pypi/dm/biomedical-knowledge-lookup?color=blue)](https://pypi.org/project/biomedical-knowledge-lookup/)
54
+ [![GitHub last commit](https://img.shields.io/github/last-commit/JonasHeinickeBio/biomedical-knowledge-lookup)](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/commits/main)
55
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1234567.svg)](https://doi.org/10.5281/zenodo.1234567)
56
+
57
+ A unified Python library for biological concept lookup across **29+ biomedical knowledge sources** including BioPortal, OLS, UMLS, ChEMBL, DisGeNET, and more. Built for bioinformatics researchers, knowledge graph developers, and biomedical data scientists.
58
+
59
+ ## ✨ Features
60
+
61
+ - **🔍 29+ Knowledge Sources**: Comprehensive coverage of biomedical ontologies and databases
62
+ - **⚡ Unified API**: Single interface for all sources with consistent results
63
+ - **🔄 Multi-source Annotation**: Cross-reference concepts across multiple databases
64
+ - **📊 RDF Export**: Convert results to RDF format for knowledge graphs
65
+ - **💾 Intelligent Caching**: Built-in caching system for performance optimization
66
+ - **🔄 Async Support**: Asynchronous operations for scalable applications
67
+ - **🧪 Comprehensive Testing**: Full test suite with unit and integration tests
68
+ - **📚 Rich Documentation**: Extensive examples and API documentation
69
+
70
+ ## 🚀 Quick Start
71
+
72
+ ### Installation
73
+
74
+ ```bash
75
+ pip install biomedical-knowledge-lookup
76
+ # or
77
+ poetry add biomedical-knowledge-lookup
78
+ # or from source
79
+ git clone https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup.git
80
+ cd biomedical-knowledge-lookup
81
+ poetry install
82
+ ```
83
+
84
+ ### Basic Usage
85
+
86
+ ```python
87
+ from knowledge_lookup import CentralKnowledgeLookup, KnowledgeSource
88
+
89
+ # Initialize the lookup system
90
+ lookup = CentralKnowledgeLookup()
91
+
92
+ # Search for concepts across multiple sources
93
+ results = await lookup.search_concepts(
94
+ "diabetes mellitus",
95
+ sources=[KnowledgeSource.BIOPORTAL, KnowledgeSource.OLS, KnowledgeSource.UMLS]
96
+ )
97
+
98
+ # Get detailed information about a specific concept
99
+ concept_details = await lookup.get_concept_details("DOID:9351")
100
+
101
+ # Export results to RDF
102
+ rdf_graph = lookup.export_to_rdf(results)
103
+ ```
104
+
105
+ ### Advanced Usage with Multi-source Annotation
106
+
107
+ ```python
108
+ from knowledge_lookup import MultiSourceAnnotator
109
+
110
+ # Annotate text with concepts from multiple sources
111
+ annotator = MultiSourceAnnotator()
112
+ annotations = await annotator.annotate_text(
113
+ "Type 2 diabetes is associated with insulin resistance",
114
+ confidence_threshold=0.7
115
+ )
116
+
117
+ # Get consensus annotations across sources
118
+ consensus = annotator.get_consensus_annotations(annotations)
119
+ ```
120
+
121
+ ## 📋 Supported Knowledge Sources
122
+
123
+ | Source | Description | API Key Required |
124
+ |--------|-------------|------------------|
125
+ | **BioPortal** | NCBI BioPortal ontology repository | Yes |
126
+ | **OLS** | Ontology Lookup Service | No |
127
+ | **UMLS** | Unified Medical Language System | Yes |
128
+ | **ChEMBL** | Chemical database | No |
129
+ | **DisGeNET** | Disease-gene associations | No |
130
+ | **DrugBank** | Drug information database | No |
131
+ | **Ensembl** | Genome annotation database | No |
132
+ | **Gene Ontology** | Molecular function/process/component | No |
133
+ | **HPO** | Human Phenotype Ontology | No |
134
+ | **Mondo** | Mondo Disease Ontology | No |
135
+ | **OpenTargets** | Target-disease associations | No |
136
+ | **PubChem** | Chemical information | No |
137
+ | **Reactome** | Pathway database | No |
138
+ | **UniProt** | Protein sequence database | No |
139
+ | **WikiData** | Structured knowledge base | No |
140
+ | **ZOOMA** | Ontology mapping service | No |
141
+ | **And 13+ more...** | See full list in documentation | Varies |
142
+
143
+ ## 🏗️ Architecture
144
+
145
+ ```
146
+ knowledge_lookup/
147
+ ├── adapters/ # Individual source adapters
148
+ ├── models.py # Data models and enums
149
+ ├── central_lookup.py # Main lookup coordinator
150
+ ├── multi_source_annotator.py # Cross-source annotation
151
+ ├── rdf_converter.py # RDF export utilities
152
+ ├── cache.py # Caching system
153
+ └── base.py # Abstract base classes
154
+ ```
155
+
156
+ ## 📖 Documentation
157
+
158
+ - **[Getting Started Guide](docs/getting_started.md)**
159
+ - **[API Reference](docs/api_reference.md)**
160
+ - **[Adapter Documentation](docs/adapters/)**
161
+ - **[Examples](examples/)**
162
+ - **[Contributing Guide](CONTRIBUTING.md)**
163
+
164
+ ### Additional Resources
165
+
166
+ - **[Documentation Improvement Summary](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/wiki/Documentation-Improvement-Summary)**
167
+ - **[Project Overview](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/wiki/Project-Overview)**
168
+
169
+ ### Example Notebooks
170
+
171
+ Explore interactive examples in the `examples/` directory:
172
+ - Basic concept lookup
173
+ - Multi-source annotation
174
+ - RDF export and knowledge graph construction
175
+ - Performance benchmarking
176
+
177
+ ## 🔧 Configuration
178
+
179
+ ### API Keys
180
+
181
+ Some sources require API keys. Set them as environment variables:
182
+
183
+ ```bash
184
+ export BIOPORTAL_API_KEY="your_key_here"
185
+ export UMLS_API_KEY="your_key_here"
186
+ # ... etc
187
+ ```
188
+
189
+ Or create a `.env` file:
190
+
191
+ ```env
192
+ BIOPORTAL_API_KEY=your_key_here
193
+ UMLS_API_KEY=your_key_here
194
+ ```
195
+
196
+ ### Advanced Configuration
197
+
198
+ ```python
199
+ from knowledge_lookup import LookupConfig
200
+
201
+ config = LookupConfig(
202
+ rate_limits={
203
+ KnowledgeSource.BIOPORTAL: 10, # requests per second
204
+ KnowledgeSource.OLS: 20,
205
+ },
206
+ cache_enabled=True,
207
+ cache_dir="./cache"
208
+ )
209
+
210
+ lookup = CentralKnowledgeLookup(config)
211
+ ```
212
+
213
+ ## 🧪 Testing
214
+
215
+ ```bash
216
+ # Run all tests
217
+ poetry run pytest
218
+
219
+ # Run specific test categories
220
+ poetry run pytest -m "unit" # Unit tests only
221
+ poetry run pytest -m "integration" # Integration tests
222
+ poetry run pytest -m "not slow" # Skip slow tests
223
+
224
+ # Run with coverage
225
+ poetry run pytest --cov=knowledge_lookup
226
+ ```
227
+
228
+ ## 🤝 Contributing
229
+
230
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
231
+
232
+ ### Adding New Adapters
233
+
234
+ 1. Extend `KnowledgeSourceAdapter` in `base.py`
235
+ 2. Implement required methods: `search_concepts()`, `get_concept_details()`
236
+ 3. Add to `adapters/__init__.py`
237
+ 4. Add tests in `tests/unit/test_adapters/`
238
+ 5. Update documentation
239
+
240
+ ### Development Setup
241
+
242
+ ```bash
243
+ git clone https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup.git
244
+ cd biomedical-knowledge-lookup
245
+ poetry install
246
+ poetry run pre-commit install
247
+ ```
248
+
249
+ ## 📄 License
250
+
251
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
252
+
253
+ ## 🙏 Acknowledgments
254
+
255
+ - Built upon the AID-PAIS Knowledge Graph project
256
+ - Thanks to all contributors and the biomedical research community
257
+ - Special thanks to the maintainers of the various knowledge sources
258
+
259
+ ## 📞 Support
260
+
261
+ - **Issues**: [GitHub Issues](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/issues)
262
+ - **Discussions**: [GitHub Discussions](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/discussions)
263
+ - **Email**: jonas.heinicke@helmholtz-hzi.de
264
+
265
+ ## 🔬 Citation
266
+
267
+ If you use this library in your research, please cite:
268
+
269
+ ```bibtex
270
+ @software{heinicke_biomedical_knowledge_lookup_2025,
271
+ author = {Heinicke, Jonas},
272
+ title = {Biomedical Knowledge Lookup: Unified biological concept lookup across 29+ biomedical knowledge sources},
273
+ url = {https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup},
274
+ version = {1.0.0},
275
+ year = {2025}
276
+ }
277
+ ```
278
+
279
+ ---
280
+
281
+ <p align="center">
282
+ <img src="https://img.shields.io/github/stars/JonasHeinickeBio/biomedical-knowledge-lookup?style=social" alt="GitHub stars">
283
+ <img src="https://img.shields.io/github/forks/JonasHeinickeBio/biomedical-knowledge-lookup?style=social" alt="GitHub forks">
284
+ </p>
285
+
286
+ <p align="center">
287
+ <em>⭐ Star this repository if you find it useful!</em>
288
+ </p>
289
+
@@ -0,0 +1,245 @@
1
+ # 🧬 Biomedical Knowledge Lookup
2
+
3
+ [![PyPI version](https://badge.fury.io/py/biomedical-knowledge-lookup.svg)](https://pypi.org/project/biomedical-knowledge-lookup/)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Tests](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/actions/workflows/tests.yml/badge.svg)](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/actions/workflows/tests.yml)
7
+ [![Coverage](https://img.shields.io/codecov/c/github/JonasHeinickeBio/biomedical-knowledge-lookup)](https://codecov.io/gh/JonasHeinickeBio/biomedical-knowledge-lookup)
8
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
9
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://jonasheinickeBio.github.io/biomedical-knowledge-lookup/)
10
+ [![PyPI downloads](https://img.shields.io/pypi/dm/biomedical-knowledge-lookup?color=blue)](https://pypi.org/project/biomedical-knowledge-lookup/)
11
+ [![GitHub last commit](https://img.shields.io/github/last-commit/JonasHeinickeBio/biomedical-knowledge-lookup)](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/commits/main)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1234567.svg)](https://doi.org/10.5281/zenodo.1234567)
13
+
14
+ A unified Python library for biological concept lookup across **29+ biomedical knowledge sources** including BioPortal, OLS, UMLS, ChEMBL, DisGeNET, and more. Built for bioinformatics researchers, knowledge graph developers, and biomedical data scientists.
15
+
16
+ ## ✨ Features
17
+
18
+ - **🔍 29+ Knowledge Sources**: Comprehensive coverage of biomedical ontologies and databases
19
+ - **⚡ Unified API**: Single interface for all sources with consistent results
20
+ - **🔄 Multi-source Annotation**: Cross-reference concepts across multiple databases
21
+ - **📊 RDF Export**: Convert results to RDF format for knowledge graphs
22
+ - **💾 Intelligent Caching**: Built-in caching system for performance optimization
23
+ - **🔄 Async Support**: Asynchronous operations for scalable applications
24
+ - **🧪 Comprehensive Testing**: Full test suite with unit and integration tests
25
+ - **📚 Rich Documentation**: Extensive examples and API documentation
26
+
27
+ ## 🚀 Quick Start
28
+
29
+ ### Installation
30
+
31
+ ```bash
32
+ pip install biomedical-knowledge-lookup
33
+ # or
34
+ poetry add biomedical-knowledge-lookup
35
+ # or from source
36
+ git clone https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup.git
37
+ cd biomedical-knowledge-lookup
38
+ poetry install
39
+ ```
40
+
41
+ ### Basic Usage
42
+
43
+ ```python
44
+ from knowledge_lookup import CentralKnowledgeLookup, KnowledgeSource
45
+
46
+ # Initialize the lookup system
47
+ lookup = CentralKnowledgeLookup()
48
+
49
+ # Search for concepts across multiple sources
50
+ results = await lookup.search_concepts(
51
+ "diabetes mellitus",
52
+ sources=[KnowledgeSource.BIOPORTAL, KnowledgeSource.OLS, KnowledgeSource.UMLS]
53
+ )
54
+
55
+ # Get detailed information about a specific concept
56
+ concept_details = await lookup.get_concept_details("DOID:9351")
57
+
58
+ # Export results to RDF
59
+ rdf_graph = lookup.export_to_rdf(results)
60
+ ```
61
+
62
+ ### Advanced Usage with Multi-source Annotation
63
+
64
+ ```python
65
+ from knowledge_lookup import MultiSourceAnnotator
66
+
67
+ # Annotate text with concepts from multiple sources
68
+ annotator = MultiSourceAnnotator()
69
+ annotations = await annotator.annotate_text(
70
+ "Type 2 diabetes is associated with insulin resistance",
71
+ confidence_threshold=0.7
72
+ )
73
+
74
+ # Get consensus annotations across sources
75
+ consensus = annotator.get_consensus_annotations(annotations)
76
+ ```
77
+
78
+ ## 📋 Supported Knowledge Sources
79
+
80
+ | Source | Description | API Key Required |
81
+ |--------|-------------|------------------|
82
+ | **BioPortal** | NCBI BioPortal ontology repository | Yes |
83
+ | **OLS** | Ontology Lookup Service | No |
84
+ | **UMLS** | Unified Medical Language System | Yes |
85
+ | **ChEMBL** | Chemical database | No |
86
+ | **DisGeNET** | Disease-gene associations | No |
87
+ | **DrugBank** | Drug information database | No |
88
+ | **Ensembl** | Genome annotation database | No |
89
+ | **Gene Ontology** | Molecular function/process/component | No |
90
+ | **HPO** | Human Phenotype Ontology | No |
91
+ | **Mondo** | Mondo Disease Ontology | No |
92
+ | **OpenTargets** | Target-disease associations | No |
93
+ | **PubChem** | Chemical information | No |
94
+ | **Reactome** | Pathway database | No |
95
+ | **UniProt** | Protein sequence database | No |
96
+ | **WikiData** | Structured knowledge base | No |
97
+ | **ZOOMA** | Ontology mapping service | No |
98
+ | **And 13+ more...** | See full list in documentation | Varies |
99
+
100
+ ## 🏗️ Architecture
101
+
102
+ ```
103
+ knowledge_lookup/
104
+ ├── adapters/ # Individual source adapters
105
+ ├── models.py # Data models and enums
106
+ ├── central_lookup.py # Main lookup coordinator
107
+ ├── multi_source_annotator.py # Cross-source annotation
108
+ ├── rdf_converter.py # RDF export utilities
109
+ ├── cache.py # Caching system
110
+ └── base.py # Abstract base classes
111
+ ```
112
+
113
+ ## 📖 Documentation
114
+
115
+ - **[Getting Started Guide](docs/getting_started.md)**
116
+ - **[API Reference](docs/api_reference.md)**
117
+ - **[Adapter Documentation](docs/adapters/)**
118
+ - **[Examples](examples/)**
119
+ - **[Contributing Guide](CONTRIBUTING.md)**
120
+
121
+ ### Additional Resources
122
+
123
+ - **[Documentation Improvement Summary](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/wiki/Documentation-Improvement-Summary)**
124
+ - **[Project Overview](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/wiki/Project-Overview)**
125
+
126
+ ### Example Notebooks
127
+
128
+ Explore interactive examples in the `examples/` directory:
129
+ - Basic concept lookup
130
+ - Multi-source annotation
131
+ - RDF export and knowledge graph construction
132
+ - Performance benchmarking
133
+
134
+ ## 🔧 Configuration
135
+
136
+ ### API Keys
137
+
138
+ Some sources require API keys. Set them as environment variables:
139
+
140
+ ```bash
141
+ export BIOPORTAL_API_KEY="your_key_here"
142
+ export UMLS_API_KEY="your_key_here"
143
+ # ... etc
144
+ ```
145
+
146
+ Or create a `.env` file:
147
+
148
+ ```env
149
+ BIOPORTAL_API_KEY=your_key_here
150
+ UMLS_API_KEY=your_key_here
151
+ ```
152
+
153
+ ### Advanced Configuration
154
+
155
+ ```python
156
+ from knowledge_lookup import LookupConfig
157
+
158
+ config = LookupConfig(
159
+ rate_limits={
160
+ KnowledgeSource.BIOPORTAL: 10, # requests per second
161
+ KnowledgeSource.OLS: 20,
162
+ },
163
+ cache_enabled=True,
164
+ cache_dir="./cache"
165
+ )
166
+
167
+ lookup = CentralKnowledgeLookup(config)
168
+ ```
169
+
170
+ ## 🧪 Testing
171
+
172
+ ```bash
173
+ # Run all tests
174
+ poetry run pytest
175
+
176
+ # Run specific test categories
177
+ poetry run pytest -m "unit" # Unit tests only
178
+ poetry run pytest -m "integration" # Integration tests
179
+ poetry run pytest -m "not slow" # Skip slow tests
180
+
181
+ # Run with coverage
182
+ poetry run pytest --cov=knowledge_lookup
183
+ ```
184
+
185
+ ## 🤝 Contributing
186
+
187
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
188
+
189
+ ### Adding New Adapters
190
+
191
+ 1. Extend `KnowledgeSourceAdapter` in `base.py`
192
+ 2. Implement required methods: `search_concepts()`, `get_concept_details()`
193
+ 3. Add to `adapters/__init__.py`
194
+ 4. Add tests in `tests/unit/test_adapters/`
195
+ 5. Update documentation
196
+
197
+ ### Development Setup
198
+
199
+ ```bash
200
+ git clone https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup.git
201
+ cd biomedical-knowledge-lookup
202
+ poetry install
203
+ poetry run pre-commit install
204
+ ```
205
+
206
+ ## 📄 License
207
+
208
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
209
+
210
+ ## 🙏 Acknowledgments
211
+
212
+ - Built upon the AID-PAIS Knowledge Graph project
213
+ - Thanks to all contributors and the biomedical research community
214
+ - Special thanks to the maintainers of the various knowledge sources
215
+
216
+ ## 📞 Support
217
+
218
+ - **Issues**: [GitHub Issues](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/issues)
219
+ - **Discussions**: [GitHub Discussions](https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/discussions)
220
+ - **Email**: jonas.heinicke@helmholtz-hzi.de
221
+
222
+ ## 🔬 Citation
223
+
224
+ If you use this library in your research, please cite:
225
+
226
+ ```bibtex
227
+ @software{heinicke_biomedical_knowledge_lookup_2025,
228
+ author = {Heinicke, Jonas},
229
+ title = {Biomedical Knowledge Lookup: Unified biological concept lookup across 29+ biomedical knowledge sources},
230
+ url = {https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup},
231
+ version = {1.0.0},
232
+ year = {2025}
233
+ }
234
+ ```
235
+
236
+ ---
237
+
238
+ <p align="center">
239
+ <img src="https://img.shields.io/github/stars/JonasHeinickeBio/biomedical-knowledge-lookup?style=social" alt="GitHub stars">
240
+ <img src="https://img.shields.io/github/forks/JonasHeinickeBio/biomedical-knowledge-lookup?style=social" alt="GitHub forks">
241
+ </p>
242
+
243
+ <p align="center">
244
+ <em>⭐ Star this repository if you find it useful!</em>
245
+ </p>
@@ -0,0 +1,133 @@
1
+ [tool.poetry]
2
+ name = "biomedical-knowledge-lookup"
3
+ version = "1.0.0"
4
+ description = "Unified biological concept lookup across 29+ biomedical knowledge sources including BioPortal, OLS, UMLS, ChEMBL, DisGeNET, and more"
5
+ authors = ["Jonas Heinicke <jonas.heinicke@helmholtz-hzi.de>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup"
9
+ repository = "https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup"
10
+ keywords = [
11
+ "biomedical",
12
+ "ontology",
13
+ "knowledge-graph",
14
+ "bioinformatics",
15
+ "semantic-web",
16
+ "rdf",
17
+ "bioportal",
18
+ "ols",
19
+ "umls",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 5 - Production/Stable",
23
+ "Intended Audience :: Science/Research",
24
+ "License :: OSI Approved :: MIT License",
25
+ "Operating System :: OS Independent",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.10",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
31
+ "Topic :: Scientific/Engineering :: Information Analysis",
32
+ ]
33
+ packages = [{ include = "knowledge_lookup", from = "src" }]
34
+
35
+ [tool.poetry.urls]
36
+ "Documentation" = "https://jonasheinickeBio.github.io/biomedical-knowledge-lookup/"
37
+ "Issue Tracker" = "https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/issues"
38
+ "Changelog" = "https://github.com/JonasHeinickeBio/biomedical-knowledge-lookup/blob/main/CHANGELOG.md"
39
+
40
+ [tool.poetry.scripts]
41
+ knowledge-lookup = "knowledge_lookup.__main__:app"
42
+
43
+ [tool.poetry.dependencies]
44
+ python = "^3.10"
45
+ aiohttp = "^3.9.0"
46
+ requests = "^2.31.0"
47
+ rdflib = "^7.0.0"
48
+ pandas = "^2.0.0"
49
+ pydantic = "^2.0.0"
50
+ tenacity = "^8.2.0"
51
+ backoff = "^2.2.0"
52
+ python-dotenv = "^1.0.0"
53
+ rich = "^13.0.0"
54
+ typer = ">=0.12.0,<1.0.0"
55
+ chembl-webresource-client = "^0.10.9"
56
+ tyto = "^1.4"
57
+ bioservices = "^1.16.0"
58
+ setuptools = "^82.0.1"
59
+
60
+ [tool.poetry.group.dev.dependencies]
61
+ pytest = "^8.0.0"
62
+ pytest-asyncio = "^0.21.0"
63
+ pytest-cov = "^4.0.0"
64
+ pytest-timeout = "^2.2.0"
65
+ ruff = "^0.1.0"
66
+ mypy = "^1.0.0"
67
+ jupyter = "^1.0.0"
68
+ ipykernel = "^6.0.0"
69
+ pre-commit = "^4.5.1"
70
+ linkml = "^1.8.0"
71
+ linkml-runtime = "^1.8.0"
72
+
73
+ [tool.poetry.group.docs.dependencies]
74
+ sphinx = "^7.0.0"
75
+ sphinx-rtd-theme = "^2.0.0"
76
+
77
+ [build-system]
78
+ requires = ["poetry-core"]
79
+ build-backend = "poetry.core.masonry.api"
80
+
81
+ [line-length]
82
+ line-length = 99
83
+
84
+ [tool.ruff]
85
+ line-length = 99
86
+ target-version = "py310"
87
+
88
+ [tool.ruff.lint]
89
+ select = [
90
+ "E", # pycodestyle errors
91
+ "W", # pycodestyle warnings
92
+ "F", # pyflakes
93
+ "I", # isort
94
+ "B", # flake8-bugbear
95
+ "C4", # flake8-comprehensions
96
+ "UP", # pyupgrade
97
+ ]
98
+ ignore = [
99
+ "B008", # do not perform function calls in argument defaults
100
+ "C901", # too complex
101
+ ]
102
+
103
+ [tool.pytest.ini_options]
104
+ addopts = "-ra -q --strict-markers"
105
+ testpaths = ["tests"]
106
+ pythonpath = ["src"]
107
+ markers = [
108
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
109
+ "integration: marks tests as integration tests",
110
+ "unit: marks tests as unit tests",
111
+ "network: marks tests that require network access",
112
+ "functional: marks tests as functional adapter tests",
113
+ "api: marks tests that require API key authentication",
114
+ ]
115
+
116
+ [tool.mypy]
117
+ python_version = "3.10"
118
+ ignore_missing_imports = true
119
+ omit = ["*/tests/*", "*/test_*.py", "*/__pycache__/*", "*/site-packages/*"]
120
+
121
+ [tool.coverage.report]
122
+ exclude_lines = [
123
+ "pragma: no cover",
124
+ "def __repr__",
125
+ "raise AssertionError",
126
+ "raise NotImplementedError",
127
+ "if __name__ == .__main__.:",
128
+ "if TYPE_CHECKING:",
129
+ "class .*\\bProtocol\\):",
130
+ "@(abc\\.)?abstractmethod",
131
+ ]
132
+ precision = 2
133
+ show_missing = true