netrias_client 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netrias_client/__init__.py +18 -0
- netrias_client/_adapter.py +288 -0
- netrias_client/_client.py +559 -0
- netrias_client/_config.py +101 -0
- netrias_client/_core.py +560 -0
- netrias_client/_data_model_store.py +366 -0
- netrias_client/_discovery.py +525 -0
- netrias_client/_errors.py +37 -0
- netrias_client/_gateway_bypass.py +217 -0
- netrias_client/_http.py +234 -0
- netrias_client/_io.py +28 -0
- netrias_client/_logging.py +46 -0
- netrias_client/_models.py +115 -0
- netrias_client/_validators.py +192 -0
- netrias_client/scripts.py +313 -0
- netrias_client-0.1.0.dist-info/METADATA +178 -0
- netrias_client-0.1.0.dist-info/RECORD +20 -0
- netrias_client-0.1.0.dist-info/WHEEL +4 -0
- netrias_client-0.1.0.dist-info/entry_points.txt +5 -0
- netrias_client-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Expose the Netrias client facade and package metadata."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ._client import NetriasClient
|
|
6
|
+
from ._errors import DataModelStoreError
|
|
7
|
+
from ._models import CDE, DataModel, PermissibleValue
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"NetriasClient",
|
|
11
|
+
"DataModel",
|
|
12
|
+
"CDE",
|
|
13
|
+
"PermissibleValue",
|
|
14
|
+
"DataModelStoreError",
|
|
15
|
+
"__version__",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Translate discovery results into manifest-friendly mappings.
|
|
2
|
+
|
|
3
|
+
'why': bridge API recommendations to harmonization manifests while respecting confidence bounds
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Final, cast
|
|
12
|
+
|
|
13
|
+
from ._models import MappingDiscoveryResult, MappingRecommendationOption, MappingSuggestion
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def build_column_mapping_payload(
|
|
18
|
+
result: MappingDiscoveryResult,
|
|
19
|
+
threshold: float,
|
|
20
|
+
logger: logging.Logger | None = None,
|
|
21
|
+
) -> dict[str, dict[str, dict[str, object]]]:
|
|
22
|
+
"""Convert discovery output into the manifest structure expected by harmonization."""
|
|
23
|
+
|
|
24
|
+
active_logger = logger or logging.getLogger("netrias_client")
|
|
25
|
+
strongest = strongest_targets(result, threshold=threshold, logger=active_logger)
|
|
26
|
+
return {"column_mappings": _column_entries(strongest, active_logger)}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_COLUMN_METADATA: Final[dict[str, dict[str, object]]] = {
|
|
30
|
+
# "study_name": {"route": "api:passthrough", "targetField": "study_name"},
|
|
31
|
+
# "number_of_participants": {"route": "api:passthrough", "targetField": "number_of_participants"},
|
|
32
|
+
# "number_of_samples": {"route": "api:passthrough", "targetField": "number_of_samples"},
|
|
33
|
+
# "study_data_types": {
|
|
34
|
+
# "route": "api:passthrough",
|
|
35
|
+
# "targetField": "study_data_types",
|
|
36
|
+
# "cdeId": 12_571_096,
|
|
37
|
+
# "cde_id": 12_571_096,
|
|
38
|
+
# },
|
|
39
|
+
# "participant_id": {"route": "api:passthrough", "targetField": "participant_id"},
|
|
40
|
+
# "sample_id": {"route": "api:passthrough", "targetField": "sample_id"},
|
|
41
|
+
# "file_name": {"route": "api:passthrough", "targetField": "file_name"},
|
|
42
|
+
"primary_diagnosis": {
|
|
43
|
+
"route": "sagemaker:primary",
|
|
44
|
+
"targetField": "primary_diagnosis",
|
|
45
|
+
"cdeId": -200,
|
|
46
|
+
"cde_id": -200,
|
|
47
|
+
},
|
|
48
|
+
"therapeutic_agents": {
|
|
49
|
+
"route": "sagemaker:therapeutic_agents",
|
|
50
|
+
"targetField": "therapeutic_agents",
|
|
51
|
+
"cdeId": -203,
|
|
52
|
+
"cde_id": -203,
|
|
53
|
+
},
|
|
54
|
+
"morphology": {
|
|
55
|
+
"route": "sagemaker:morphology",
|
|
56
|
+
"targetField": "morphology",
|
|
57
|
+
"cdeId": -201,
|
|
58
|
+
"cde_id": -201,
|
|
59
|
+
},
|
|
60
|
+
# "tissue_or_organ_of_origin": {
|
|
61
|
+
# "route": "sagemaker:tissue_origin",
|
|
62
|
+
# "targetField": "tissue_or_organ_of_origin",
|
|
63
|
+
# "cdeId": -204,
|
|
64
|
+
# "cde_id": -204,
|
|
65
|
+
# },
|
|
66
|
+
# "site_of_resection_or_biopsy": {
|
|
67
|
+
# "route": "sagemaker:sample_anatomic_site",
|
|
68
|
+
# "targetField": "site_of_resection_or_biopsy",
|
|
69
|
+
# "cdeId": -202,
|
|
70
|
+
# "cde_id": -202,
|
|
71
|
+
# },
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def strongest_targets(
|
|
76
|
+
result: MappingDiscoveryResult,
|
|
77
|
+
threshold: float,
|
|
78
|
+
logger: logging.Logger,
|
|
79
|
+
) -> dict[str, str]:
|
|
80
|
+
"""Return the highest-confidence target per column, filtered by threshold."""
|
|
81
|
+
|
|
82
|
+
if result.suggestions:
|
|
83
|
+
selected = _from_suggestions(result.suggestions, threshold)
|
|
84
|
+
else:
|
|
85
|
+
selected = _from_raw_payload(result.raw, threshold)
|
|
86
|
+
|
|
87
|
+
if selected:
|
|
88
|
+
logger.info("adapter strongest targets: %s", selected)
|
|
89
|
+
else:
|
|
90
|
+
logger.warning("adapter strongest targets empty after filtering")
|
|
91
|
+
return selected
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _column_entries(
|
|
95
|
+
strongest: Mapping[str, str],
|
|
96
|
+
logger: logging.Logger,
|
|
97
|
+
) -> dict[str, dict[str, object]]:
|
|
98
|
+
entries: dict[str, dict[str, object]] = {}
|
|
99
|
+
missing_cde: dict[str, str] = {}
|
|
100
|
+
for source, target in strongest.items():
|
|
101
|
+
entry = _initial_entry(source, target)
|
|
102
|
+
if _needs_cde(entry):
|
|
103
|
+
missing_cde[source] = target
|
|
104
|
+
entries[source] = entry
|
|
105
|
+
|
|
106
|
+
_apply_metadata_defaults(entries)
|
|
107
|
+
|
|
108
|
+
if missing_cde:
|
|
109
|
+
logger.info("adapter unresolved targets (no CDE id mapping): %s", missing_cde)
|
|
110
|
+
return entries
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _initial_entry(source: str, target: str) -> dict[str, object]:
|
|
114
|
+
metadata = _COLUMN_METADATA.get(source)
|
|
115
|
+
if metadata is None:
|
|
116
|
+
return {"targetField": target}
|
|
117
|
+
# Preserve configured targetField when metadata defines it.
|
|
118
|
+
return dict(metadata)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _needs_cde(entry: Mapping[str, object]) -> bool:
|
|
122
|
+
return "cdeId" not in entry
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _apply_metadata_defaults(entries: dict[str, dict[str, object]]) -> None:
|
|
126
|
+
for source, metadata in _COLUMN_METADATA.items():
|
|
127
|
+
if source not in entries:
|
|
128
|
+
entries[source] = dict(metadata)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _from_suggestions(
|
|
132
|
+
suggestions: Iterable[MappingSuggestion], threshold: float
|
|
133
|
+
) -> dict[str, str]:
|
|
134
|
+
strongest: dict[str, str] = {}
|
|
135
|
+
for suggestion in suggestions:
|
|
136
|
+
option = _top_option(suggestion.options, threshold)
|
|
137
|
+
if option is None or option.target is None:
|
|
138
|
+
continue
|
|
139
|
+
strongest[suggestion.source_column] = option.target
|
|
140
|
+
return strongest
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _from_raw_payload(payload: Mapping[str, object], threshold: float) -> dict[str, str]:
|
|
144
|
+
strongest: dict[str, str] = {}
|
|
145
|
+
for column, value in payload.items():
|
|
146
|
+
options = _coerce_options(value)
|
|
147
|
+
option = _top_option(options, threshold)
|
|
148
|
+
if option is None or option.target is None:
|
|
149
|
+
continue
|
|
150
|
+
strongest[column] = option.target
|
|
151
|
+
return strongest
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _coerce_options(value: object) -> tuple[MappingRecommendationOption, ...]:
|
|
155
|
+
if not isinstance(value, list):
|
|
156
|
+
return ()
|
|
157
|
+
return tuple(_option_iterator(cast(list[object], value)))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _option_iterator(items: list[object]) -> Iterable[MappingRecommendationOption]:
|
|
161
|
+
for item in items:
|
|
162
|
+
if not isinstance(item, Mapping):
|
|
163
|
+
continue
|
|
164
|
+
option = _option_from_mapping(cast(Mapping[str, object], item))
|
|
165
|
+
if option is not None:
|
|
166
|
+
yield option
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _option_from_mapping(item: Mapping[str, object]) -> MappingRecommendationOption | None:
|
|
170
|
+
target = item.get("target")
|
|
171
|
+
if not isinstance(target, str):
|
|
172
|
+
return None
|
|
173
|
+
similarity = item.get("similarity")
|
|
174
|
+
score: float | None = None
|
|
175
|
+
if isinstance(similarity, (float, int)):
|
|
176
|
+
score = float(similarity)
|
|
177
|
+
return MappingRecommendationOption(target=target, confidence=score, raw=item)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _top_option(
|
|
181
|
+
options: Iterable[MappingRecommendationOption], threshold: float
|
|
182
|
+
) -> MappingRecommendationOption | None:
|
|
183
|
+
eligible = [opt for opt in options if _meets_threshold(opt, threshold)]
|
|
184
|
+
if not eligible:
|
|
185
|
+
return None
|
|
186
|
+
return max(eligible, key=lambda opt: opt.confidence or float("-inf"))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _meets_threshold(option: MappingRecommendationOption, threshold: float) -> bool:
|
|
190
|
+
score = option.confidence
|
|
191
|
+
if score is None:
|
|
192
|
+
return False
|
|
193
|
+
return score >= threshold
|
|
194
|
+
|
|
195
|
+
def normalize_manifest_mapping(
|
|
196
|
+
manifest: Path | Mapping[str, object] | None,
|
|
197
|
+
) -> dict[str, int]:
|
|
198
|
+
"""Normalize manifest column→CDE entries for harmonization payloads."""
|
|
199
|
+
|
|
200
|
+
if manifest is None:
|
|
201
|
+
return {}
|
|
202
|
+
raw = _load_manifest_raw(manifest)
|
|
203
|
+
mapping = _mapping_dict(raw)
|
|
204
|
+
normalized: dict[str, int] = {}
|
|
205
|
+
for field, value in mapping.items():
|
|
206
|
+
_apply_cde_entry(normalized, field, value)
|
|
207
|
+
return normalized
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _load_manifest_raw(manifest: Path | Mapping[str, object]) -> Mapping[str, object]:
|
|
211
|
+
if isinstance(manifest, Path):
|
|
212
|
+
content = manifest.read_text(encoding="utf-8")
|
|
213
|
+
try:
|
|
214
|
+
return cast(Mapping[str, object], json.loads(content))
|
|
215
|
+
except json.JSONDecodeError as exc:
|
|
216
|
+
raise ValueError(f"manifest must be valid JSON: {exc}") from exc
|
|
217
|
+
return manifest
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _mapping_dict(raw: Mapping[str, object]) -> dict[str, object]:
|
|
221
|
+
mapping = _dict_if_str_mapping(raw)
|
|
222
|
+
if mapping is None:
|
|
223
|
+
return {}
|
|
224
|
+
candidate = _dict_if_str_mapping(mapping.get("column_mappings"))
|
|
225
|
+
return candidate if candidate is not None else mapping
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _dict_if_str_mapping(value: object) -> dict[str, object] | None:
|
|
229
|
+
if isinstance(value, Mapping):
|
|
230
|
+
typed = cast(Mapping[str, object], value)
|
|
231
|
+
return dict(typed)
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _apply_cde_entry(destination: dict[str, int], field: object, value: object) -> None:
|
|
236
|
+
name = _clean_field(field)
|
|
237
|
+
cde_id = _coerce_cde_id(value)
|
|
238
|
+
if name is None or cde_id is None:
|
|
239
|
+
return
|
|
240
|
+
destination[name] = cde_id
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _clean_field(field: object) -> str | None:
|
|
244
|
+
if not isinstance(field, str):
|
|
245
|
+
return None
|
|
246
|
+
name = field.strip()
|
|
247
|
+
return name or None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _coerce_cde_id(value: object) -> int | None:
|
|
251
|
+
candidate = _cde_candidate(value)
|
|
252
|
+
if candidate is None:
|
|
253
|
+
return None
|
|
254
|
+
return _int_from_candidate(candidate)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _cde_candidate(value: object) -> object | None:
|
|
258
|
+
mapping = _dict_if_str_mapping(value)
|
|
259
|
+
if mapping is not None:
|
|
260
|
+
return mapping.get("cdeId") or mapping.get("cde_id")
|
|
261
|
+
return value
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _int_from_candidate(candidate: object) -> int | None:
|
|
265
|
+
if isinstance(candidate, bool):
|
|
266
|
+
return int(candidate)
|
|
267
|
+
if isinstance(candidate, (int, float)):
|
|
268
|
+
return _int_from_number(candidate)
|
|
269
|
+
if isinstance(candidate, str):
|
|
270
|
+
return _int_from_string(candidate)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _int_from_number(value: int | float) -> int | None:
|
|
275
|
+
try:
|
|
276
|
+
return int(value)
|
|
277
|
+
except (TypeError, ValueError):
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _int_from_string(value: str) -> int | None:
|
|
282
|
+
stripped = value.strip()
|
|
283
|
+
if not stripped:
|
|
284
|
+
return None
|
|
285
|
+
try:
|
|
286
|
+
return int(stripped)
|
|
287
|
+
except ValueError:
|
|
288
|
+
return None
|