ddharmon 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddharmon-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Trent Leslie
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,316 @@
1
+ Metadata-Version: 2.3
2
+ Name: ddharmon
3
+ Version: 0.1.0
4
+ Summary: Python client for the BioMapper2 API — map biological entities to standardized knowledge graph identifiers
5
+ License: MIT
6
+ Keywords: bioinformatics,metabolomics,knowledge-graph,biomapper,multi-omics
7
+ Author: Trent Leslie
8
+ Author-email: trent@phenomehealth.org
9
+ Requires-Python: >=3.11,<4.0
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Provides-Extra: all
19
+ Provides-Extra: metabolon
20
+ Provides-Extra: notebook
21
+ Requires-Dist: httpx (>=0.27,<0.28)
22
+ Requires-Dist: pydantic (>=2.0,<3.0)
23
+ Requires-Dist: python-dotenv (>=1.0,<2.0)
24
+ Project-URL: Documentation, https://github.com/trentleslie/ddharmon#readme
25
+ Project-URL: Homepage, https://github.com/trentleslie/ddharmon
26
+ Project-URL: Repository, https://github.com/trentleslie/ddharmon
27
+ Description-Content-Type: text/markdown
28
+
29
+ # ddharmon
30
+
31
+ Python client for the **BioMapper2 API** — map biological entity names to
32
+ standardized knowledge-graph identifiers (CHEBI, HMDB, PubChem, RefMet, and more).
33
+
34
+ ```python
35
+ from ddharmon import map_entity
36
+
37
+ result = map_entity("L-Histidine")
38
+ print(result.primary_curie) # RM:0129894
39
+ print(result.confidence_tier) # high
40
+ print(result.ids_for("CHEBI")) # ['15971']
41
+ ```
42
+
43
+ ---
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ # Core (async HTTP client + Pydantic models)
49
+ pip install ddharmon
50
+
51
+ # With Metabolon preprocessing utilities (pandas, openpyxl)
52
+ pip install "ddharmon[metabolon]"
53
+
54
+ # With notebook progress bars (tqdm, nest-asyncio)
55
+ pip install "ddharmon[notebook]"
56
+
57
+ # Everything
58
+ pip install "ddharmon[all]"
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Getting an API key
64
+
65
+ The BioMapper2 API requires an API key. To request access, email
66
+ [trent.leslie@phenomehealth.org](mailto:trent.leslie@phenomehealth.org).
67
+
68
+ Once you have a key, set it in your environment:
69
+ ```bash
70
+ export BIOMAPPER_API_KEY=your-key-here
71
+ ```
72
+
73
+ Or add it to a `.env` file in your project root:
74
+ ```
75
+ BIOMAPPER_API_KEY=your-key-here
76
+ ```
77
+
78
+ ddharmon will pick it up automatically from either location.
79
+
80
+ ---
81
+
82
+ ## Quick start
83
+
84
+ ### Single lookup (synchronous)
85
+
86
+ ```python
87
+ from ddharmon import map_entity
88
+
89
+ result = map_entity("L-Histidine")
90
+
91
+ print(result.resolved) # True
92
+ print(result.primary_curie) # RM:0129894
93
+ print(result.chosen_kg_id) # CHEBI:15971
94
+ print(result.confidence_score) # 2.489
95
+ print(result.confidence_tier) # high (≥2.0)
96
+ print(result.ids_for("CHEBI")) # ['15971']
97
+ print(result.ids_for("refmet_id")) # ['RM0129894']
98
+ ```
99
+
100
+ ### Batch mapping (synchronous)
101
+
102
+ ```python
103
+ from ddharmon import map_entities, summarize
104
+
105
+ records = [
106
+ {"name": "L-Histidine"},
107
+ {"name": "Glucose", "identifiers": {"HMDB": "HMDB00122"}},
108
+ {"name": "Sphinganine"},
109
+ ]
110
+
111
+ results = map_entities(records, progress=True) # tqdm bar with [notebook]
112
+ summary = summarize(results)
113
+
114
+ print(f"{summary.resolved}/{summary.total_queried} resolved")
115
+ print(f"Resolution rate: {summary.resolution_rate:.1%}")
116
+ print(summary.vocabulary_coverage)
117
+ ```
118
+
119
+ ### Async usage
120
+
121
+ ```python
122
+ import asyncio
123
+ from ddharmon import BioMapperClient
124
+
125
+ async def main() -> None:
126
+ async with BioMapperClient() as client:
127
+ # Verify connectivity
128
+ health = await client.health_check()
129
+ print(health) # {'status': 'healthy', ...}
130
+
131
+ # Single
132
+ result = await client.map_entity(
133
+ "L-Histidine",
134
+ identifiers={"HMDB": "HMDB00177"},
135
+ )
136
+
137
+ # Batch with rate limiting
138
+ results = await client.map_entities(
139
+ [{"name": "L-Histidine"}, {"name": "Glucose"}],
140
+ rate_limit_delay=0.3,
141
+ progress=True,
142
+ )
143
+
144
+ asyncio.run(main())
145
+ ```
146
+
147
+ ### Jupyter notebooks
148
+
149
+ Apply `nest_asyncio` before using sync helpers inside a running event loop:
150
+
151
+ ```python
152
+ import nest_asyncio
153
+ nest_asyncio.apply() # required in Jupyter
154
+
155
+ from ddharmon import map_entities
156
+ results = map_entities([{"name": "L-Histidine"}], progress=True)
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Metabolon extras
162
+
163
+ The `ddharmon[metabolon]` extra ships helpers that replicate and generalize the
164
+ preprocessing from the BioVector-eval Metabolon tutorial notebook.
165
+
166
+ ```python
167
+ import pandas as pd
168
+ from ddharmon import map_entities, summarize
169
+ from ddharmon.extras.metabolon import (
170
+ build_mapping_queue,
171
+ clean_compound_name,
172
+ extract_hmdb_id,
173
+ )
174
+ from ddharmon.extras.metabolon.export import save_results, results_to_dataframe
175
+
176
+ # 1. Load your Metabolon features spreadsheet
177
+ df = pd.read_excel("Metabolon_unknown_combined_features_metadata.xlsx")
178
+
179
+ # 2. Build a deduplicated mapping queue
180
+ # - cleans compound names (strips quotes, _CE## suffixes)
181
+ # - extracts HMDB hints from ms1_compound_name
182
+ # - deduplicates by cleaned name, tracking all feature_ids
183
+ queue = build_mapping_queue(
184
+ df,
185
+ name_col="matched_name",
186
+ hint_col="ms1_compound_name",
187
+ limit=50, # set to None for full run
188
+ )
189
+
190
+ print(f"{len(queue)} unique names to map")
191
+ print(f" with HMDB hints: {sum(1 for r in queue if r.hmdb_hint)}")
192
+
193
+ # 3. Map (convert queue → API records first)
194
+ results = map_entities(
195
+ [r.as_api_record() for r in queue],
196
+ rate_limit_delay=0.3,
197
+ progress=True,
198
+ )
199
+
200
+ # 4. Summarize
201
+ summary = summarize(results)
202
+ print(f"Resolution rate: {summary.resolution_rate:.1%}")
203
+
204
+ # 5. Export
205
+ save_results(
206
+ results,
207
+ summary=summary,
208
+ json_path="output/mapping.json",
209
+ tsv_path="output/mapping.tsv",
210
+ )
211
+
212
+ # Or work directly in pandas
213
+ result_df = results_to_dataframe(results)
214
+ print(result_df[["query_name", "primary_curie", "confidence_tier"]].head())
215
+ ```
216
+
217
+ ### Preprocessing functions
218
+
219
+ ```python
220
+ from ddharmon.extras.metabolon import clean_compound_name, extract_hmdb_id
221
+
222
+ # Strip quotes and collision-energy suffixes
223
+ clean_compound_name('"1,3-Diphenylguanidine_CE45"') # '1,3-Diphenylguanidine'
224
+ clean_compound_name('"4,6-DIOXOHEPTANOIC ACID"') # '4,6-DIOXOHEPTANOIC ACID'
225
+ clean_compound_name('L-Histidine') # 'L-Histidine' (unchanged)
226
+
227
+ # Extract HMDB accessions from ms1_compound_name format
228
+ extract_hmdb_id('HMDB:HMDB03349-2257 L-Dihydroorotic acid') # 'HMDB03349'
229
+ extract_hmdb_id('HMDB00177') # 'HMDB00177'
230
+ extract_hmdb_id(None) # None
231
+ ```
232
+
233
+ ---
234
+
235
+ ## API reference
236
+
237
+ ### `MappingResult`
238
+
239
+ | Attribute | Type | Description |
240
+ |---|---|---|
241
+ | `query_name` | `str` | Name submitted to the API |
242
+ | `resolved` | `bool` | Whether any identifier was returned |
243
+ | `primary_curie` | `str \| None` | First CURIE in the response |
244
+ | `chosen_kg_id` | `str \| None` | Resolver-selected knowledge graph ID |
245
+ | `confidence_score` | `float \| None` | Highest score across annotators |
246
+ | `confidence_tier` | `str` | `"high"` (≥2.0) / `"medium"` (1–2) / `"low"` (<1) / `"unknown"` |
247
+ | `identifiers` | `dict[str, list[str]]` | Vocabulary → IDs, e.g. `{"CHEBI": ["15971"]}` |
248
+ | `hmdb_hint` | `str \| None` | HMDB hint passed in the request |
249
+ | `error` | `str \| None` | Error message if mapping failed |
250
+
251
+ ```python
252
+ result.ids_for("CHEBI") # ['15971']
253
+ result.ids_for("refmet_id") # ['RM0129894']
254
+ result.ids_for("PUBCHEM.COMPOUND") # []
255
+ ```
256
+
257
+ ### Confidence tiers
258
+
259
+ | Score | Tier | Recommended action |
260
+ |---|---|---|
261
+ | ≥ 2.0 | `high` | Accept without review |
262
+ | 1.0–2.0 | `medium` | Quick sanity check |
263
+ | < 1.0 | `low` | Manual review recommended |
264
+ | `None` | `unknown` | No score returned (e.g. HMDB-hint resolved) |
265
+
266
+ ### Error handling
267
+
268
+ ```python
269
+ from ddharmon import (
270
+ BioMapperError, # base class
271
+ BioMapperAuthError, # 401/403 — bad API key
272
+ BioMapperRateLimitError, # 429 — throttled
273
+ BioMapperServerError, # 5xx
274
+ BioMapperTimeoutError, # request timeout
275
+ BioMapperConfigError, # missing API key / bad config
276
+ )
277
+
278
+ try:
279
+ result = map_entity("Glucose")
280
+ except BioMapperRateLimitError as e:
281
+ print(f"Throttled. Retry after: {e.retry_after}s")
282
+ except BioMapperAuthError:
283
+ print("Check your BIOMAPPER_API_KEY")
284
+ ```
285
+
286
+ In batch mode (`map_entities`), per-record errors are caught and returned as
287
+ `MappingResult(error=...)` rather than aborting the batch.
288
+
289
+ ---
290
+
291
+ ## Development
292
+
293
+ ```bash
294
+ git clone https://github.com/trentleslie/ddharmon
295
+ cd ddharmon
296
+ poetry install --with dev --extras all
297
+
298
+ make check # format → lint → type-check → test
299
+ make test # tests only
300
+ make coverage # HTML coverage report
301
+ ```
302
+
303
+ ---
304
+
305
+ ## License
306
+
307
+ MIT — see [LICENSE](LICENSE).
308
+
309
+ ---
310
+
311
+ ## Related
312
+
313
+ - **BioMapper2 API**: `https://biomapper.expertintheloop.io`
314
+ - **EITL platform**: `https://expertintheloop.io`
315
+ - **biovector-eval notebooks**: `https://github.com/trentleslie/biovector-eval`
316
+
@@ -0,0 +1,287 @@
1
+ # ddharmon
2
+
3
+ Python client for the **BioMapper2 API** — map biological entity names to
4
+ standardized knowledge-graph identifiers (CHEBI, HMDB, PubChem, RefMet, and more).
5
+
6
+ ```python
7
+ from ddharmon import map_entity
8
+
9
+ result = map_entity("L-Histidine")
10
+ print(result.primary_curie) # RM:0129894
11
+ print(result.confidence_tier) # high
12
+ print(result.ids_for("CHEBI")) # ['15971']
13
+ ```
14
+
15
+ ---
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ # Core (async HTTP client + Pydantic models)
21
+ pip install ddharmon
22
+
23
+ # With Metabolon preprocessing utilities (pandas, openpyxl)
24
+ pip install "ddharmon[metabolon]"
25
+
26
+ # With notebook progress bars (tqdm, nest-asyncio)
27
+ pip install "ddharmon[notebook]"
28
+
29
+ # Everything
30
+ pip install "ddharmon[all]"
31
+ ```
32
+
33
+ ---
34
+
35
+ ## Getting an API key
36
+
37
+ The BioMapper2 API requires an API key. To request access, email
38
+ [trent.leslie@phenomehealth.org](mailto:trent.leslie@phenomehealth.org).
39
+
40
+ Once you have a key, set it in your environment:
41
+ ```bash
42
+ export BIOMAPPER_API_KEY=your-key-here
43
+ ```
44
+
45
+ Or add it to a `.env` file in your project root:
46
+ ```
47
+ BIOMAPPER_API_KEY=your-key-here
48
+ ```
49
+
50
+ ddharmon will pick it up automatically from either location.
51
+
52
+ ---
53
+
54
+ ## Quick start
55
+
56
+ ### Single lookup (synchronous)
57
+
58
+ ```python
59
+ from ddharmon import map_entity
60
+
61
+ result = map_entity("L-Histidine")
62
+
63
+ print(result.resolved) # True
64
+ print(result.primary_curie) # RM:0129894
65
+ print(result.chosen_kg_id) # CHEBI:15971
66
+ print(result.confidence_score) # 2.489
67
+ print(result.confidence_tier) # high (≥2.0)
68
+ print(result.ids_for("CHEBI")) # ['15971']
69
+ print(result.ids_for("refmet_id")) # ['RM0129894']
70
+ ```
71
+
72
+ ### Batch mapping (synchronous)
73
+
74
+ ```python
75
+ from ddharmon import map_entities, summarize
76
+
77
+ records = [
78
+ {"name": "L-Histidine"},
79
+ {"name": "Glucose", "identifiers": {"HMDB": "HMDB00122"}},
80
+ {"name": "Sphinganine"},
81
+ ]
82
+
83
+ results = map_entities(records, progress=True) # tqdm bar with [notebook]
84
+ summary = summarize(results)
85
+
86
+ print(f"{summary.resolved}/{summary.total_queried} resolved")
87
+ print(f"Resolution rate: {summary.resolution_rate:.1%}")
88
+ print(summary.vocabulary_coverage)
89
+ ```
90
+
91
+ ### Async usage
92
+
93
+ ```python
94
+ import asyncio
95
+ from ddharmon import BioMapperClient
96
+
97
+ async def main() -> None:
98
+ async with BioMapperClient() as client:
99
+ # Verify connectivity
100
+ health = await client.health_check()
101
+ print(health) # {'status': 'healthy', ...}
102
+
103
+ # Single
104
+ result = await client.map_entity(
105
+ "L-Histidine",
106
+ identifiers={"HMDB": "HMDB00177"},
107
+ )
108
+
109
+ # Batch with rate limiting
110
+ results = await client.map_entities(
111
+ [{"name": "L-Histidine"}, {"name": "Glucose"}],
112
+ rate_limit_delay=0.3,
113
+ progress=True,
114
+ )
115
+
116
+ asyncio.run(main())
117
+ ```
118
+
119
+ ### Jupyter notebooks
120
+
121
+ Apply `nest_asyncio` before using sync helpers inside a running event loop:
122
+
123
+ ```python
124
+ import nest_asyncio
125
+ nest_asyncio.apply() # required in Jupyter
126
+
127
+ from ddharmon import map_entities
128
+ results = map_entities([{"name": "L-Histidine"}], progress=True)
129
+ ```
130
+
131
+ ---
132
+
133
+ ## Metabolon extras
134
+
135
+ The `ddharmon[metabolon]` extra ships helpers that replicate and generalize the
136
+ preprocessing from the BioVector-eval Metabolon tutorial notebook.
137
+
138
+ ```python
139
+ import pandas as pd
140
+ from ddharmon import map_entities, summarize
141
+ from ddharmon.extras.metabolon import (
142
+ build_mapping_queue,
143
+ clean_compound_name,
144
+ extract_hmdb_id,
145
+ )
146
+ from ddharmon.extras.metabolon.export import save_results, results_to_dataframe
147
+
148
+ # 1. Load your Metabolon features spreadsheet
149
+ df = pd.read_excel("Metabolon_unknown_combined_features_metadata.xlsx")
150
+
151
+ # 2. Build a deduplicated mapping queue
152
+ # - cleans compound names (strips quotes, _CE## suffixes)
153
+ # - extracts HMDB hints from ms1_compound_name
154
+ # - deduplicates by cleaned name, tracking all feature_ids
155
+ queue = build_mapping_queue(
156
+ df,
157
+ name_col="matched_name",
158
+ hint_col="ms1_compound_name",
159
+ limit=50, # set to None for full run
160
+ )
161
+
162
+ print(f"{len(queue)} unique names to map")
163
+ print(f" with HMDB hints: {sum(1 for r in queue if r.hmdb_hint)}")
164
+
165
+ # 3. Map (convert queue → API records first)
166
+ results = map_entities(
167
+ [r.as_api_record() for r in queue],
168
+ rate_limit_delay=0.3,
169
+ progress=True,
170
+ )
171
+
172
+ # 4. Summarize
173
+ summary = summarize(results)
174
+ print(f"Resolution rate: {summary.resolution_rate:.1%}")
175
+
176
+ # 5. Export
177
+ save_results(
178
+ results,
179
+ summary=summary,
180
+ json_path="output/mapping.json",
181
+ tsv_path="output/mapping.tsv",
182
+ )
183
+
184
+ # Or work directly in pandas
185
+ result_df = results_to_dataframe(results)
186
+ print(result_df[["query_name", "primary_curie", "confidence_tier"]].head())
187
+ ```
188
+
189
+ ### Preprocessing functions
190
+
191
+ ```python
192
+ from ddharmon.extras.metabolon import clean_compound_name, extract_hmdb_id
193
+
194
+ # Strip quotes and collision-energy suffixes
195
+ clean_compound_name('"1,3-Diphenylguanidine_CE45"') # '1,3-Diphenylguanidine'
196
+ clean_compound_name('"4,6-DIOXOHEPTANOIC ACID"') # '4,6-DIOXOHEPTANOIC ACID'
197
+ clean_compound_name('L-Histidine') # 'L-Histidine' (unchanged)
198
+
199
+ # Extract HMDB accessions from ms1_compound_name format
200
+ extract_hmdb_id('HMDB:HMDB03349-2257 L-Dihydroorotic acid') # 'HMDB03349'
201
+ extract_hmdb_id('HMDB00177') # 'HMDB00177'
202
+ extract_hmdb_id(None) # None
203
+ ```
204
+
205
+ ---
206
+
207
+ ## API reference
208
+
209
+ ### `MappingResult`
210
+
211
+ | Attribute | Type | Description |
212
+ |---|---|---|
213
+ | `query_name` | `str` | Name submitted to the API |
214
+ | `resolved` | `bool` | Whether any identifier was returned |
215
+ | `primary_curie` | `str \| None` | First CURIE in the response |
216
+ | `chosen_kg_id` | `str \| None` | Resolver-selected knowledge graph ID |
217
+ | `confidence_score` | `float \| None` | Highest score across annotators |
218
+ | `confidence_tier` | `str` | `"high"` (≥2.0) / `"medium"` (1–2) / `"low"` (<1) / `"unknown"` |
219
+ | `identifiers` | `dict[str, list[str]]` | Vocabulary → IDs, e.g. `{"CHEBI": ["15971"]}` |
220
+ | `hmdb_hint` | `str \| None` | HMDB hint passed in the request |
221
+ | `error` | `str \| None` | Error message if mapping failed |
222
+
223
+ ```python
224
+ result.ids_for("CHEBI") # ['15971']
225
+ result.ids_for("refmet_id") # ['RM0129894']
226
+ result.ids_for("PUBCHEM.COMPOUND") # []
227
+ ```
228
+
229
+ ### Confidence tiers
230
+
231
+ | Score | Tier | Recommended action |
232
+ |---|---|---|
233
+ | ≥ 2.0 | `high` | Accept without review |
234
+ | 1.0–2.0 | `medium` | Quick sanity check |
235
+ | < 1.0 | `low` | Manual review recommended |
236
+ | `None` | `unknown` | No score returned (e.g. HMDB-hint resolved) |
237
+
238
+ ### Error handling
239
+
240
+ ```python
241
+ from ddharmon import (
242
+ BioMapperError, # base class
243
+ BioMapperAuthError, # 401/403 — bad API key
244
+ BioMapperRateLimitError, # 429 — throttled
245
+ BioMapperServerError, # 5xx
246
+ BioMapperTimeoutError, # request timeout
247
+ BioMapperConfigError, # missing API key / bad config
248
+ )
249
+
250
+ try:
251
+ result = map_entity("Glucose")
252
+ except BioMapperRateLimitError as e:
253
+ print(f"Throttled. Retry after: {e.retry_after}s")
254
+ except BioMapperAuthError:
255
+ print("Check your BIOMAPPER_API_KEY")
256
+ ```
257
+
258
+ In batch mode (`map_entities`), per-record errors are caught and returned as
259
+ `MappingResult(error=...)` rather than aborting the batch.
260
+
261
+ ---
262
+
263
+ ## Development
264
+
265
+ ```bash
266
+ git clone https://github.com/trentleslie/ddharmon
267
+ cd ddharmon
268
+ poetry install --with dev --extras all
269
+
270
+ make check # format → lint → type-check → test
271
+ make test # tests only
272
+ make coverage # HTML coverage report
273
+ ```
274
+
275
+ ---
276
+
277
+ ## License
278
+
279
+ MIT — see [LICENSE](LICENSE).
280
+
281
+ ---
282
+
283
+ ## Related
284
+
285
+ - **BioMapper2 API**: `https://biomapper.expertintheloop.io`
286
+ - **EITL platform**: `https://expertintheloop.io`
287
+ - **biovector-eval notebooks**: `https://github.com/trentleslie/biovector-eval`
@@ -0,0 +1,92 @@
1
+ [tool.poetry]
2
+ name = "ddharmon"
3
+ version = "0.1.0"
4
+ description = "Python client for the BioMapper2 API — map biological entities to standardized knowledge graph identifiers"
5
+ authors = ["Trent Leslie <trent@phenomehealth.org>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/trentleslie/ddharmon"
9
+ repository = "https://github.com/trentleslie/ddharmon"
10
+ documentation = "https://github.com/trentleslie/ddharmon#readme"
11
+ keywords = ["bioinformatics", "metabolomics", "knowledge-graph", "biomapper", "multi-omics"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Science/Research",
15
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ ]
21
+ packages = [{include = "ddharmon", from = "src"}]
22
+
23
+ [tool.poetry.dependencies]
24
+ python = "^3.11"
25
+ httpx = "^0.27"
26
+ pydantic = "^2.0"
27
+ python-dotenv = "^1.0"
28
+
29
+ [tool.poetry.extras]
30
+ metabolon = ["pandas", "openpyxl"]
31
+ notebook = ["tqdm", "nest-asyncio"]
32
+ all = ["pandas", "openpyxl", "tqdm", "nest-asyncio"]
33
+
34
+ [tool.poetry.group.metabolon.dependencies]
35
+ pandas = {version = "^2.0", optional = true}
36
+ openpyxl = {version = "^3.1", optional = true}
37
+
38
+ [tool.poetry.group.notebook.dependencies]
39
+ tqdm = {version = "^4.66", optional = true}
40
+ nest-asyncio = {version = "^1.6", optional = true}
41
+
42
+ [tool.poetry.group.dev.dependencies]
43
+ pytest = "^8.0"
44
+ pytest-asyncio = "^0.23"
45
+ pytest-cov = "^5.0"
46
+ respx = "^0.21"
47
+ ruff = "^0.4"
48
+ mypy = "^1.10"
49
+ pandas-stubs = "^2.0"
50
+ types-tqdm = "^4.66"
51
+
52
+ [build-system]
53
+ requires = ["poetry-core"]
54
+ build-backend = "poetry.core.masonry.api"
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Ruff
58
+ # ---------------------------------------------------------------------------
59
+ [tool.ruff]
60
+ target-version = "py311"
61
+ line-length = 100
62
+ src = ["src", "tests"]
63
+
64
+ [tool.ruff.lint]
65
+ select = ["E", "F", "I", "UP", "B", "SIM", "ANN"]
66
+ ignore = ["ANN101", "ANN102"]
67
+
68
+ [tool.ruff.lint.per-file-ignores]
69
+ "tests/**" = ["ANN"]
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Mypy
73
+ # ---------------------------------------------------------------------------
74
+ [tool.mypy]
75
+ python_version = "3.11"
76
+ strict = true
77
+ warn_return_any = true
78
+ warn_unused_configs = true
79
+ disallow_untyped_defs = true
80
+ disallow_any_generics = true
81
+
82
+ [[tool.mypy.overrides]]
83
+ module = ["nest_asyncio"]
84
+ ignore_missing_imports = true
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Pytest
88
+ # ---------------------------------------------------------------------------
89
+ [tool.pytest.ini_options]
90
+ asyncio_mode = "auto"
91
+ testpaths = ["tests"]
92
+ addopts = "--cov=ddharmon --cov-report=term-missing --cov-fail-under=80"