netrias_client 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ """Expose the Netrias client facade and package metadata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ._client import NetriasClient
6
+ from ._errors import DataModelStoreError
7
+ from ._models import CDE, DataModel, PermissibleValue
8
+
9
+ __all__ = [
10
+ "NetriasClient",
11
+ "DataModel",
12
+ "CDE",
13
+ "PermissibleValue",
14
+ "DataModelStoreError",
15
+ "__version__",
16
+ ]
17
+
18
+ __version__ = "0.1.0"
@@ -0,0 +1,288 @@
1
+ """Translate discovery results into manifest-friendly mappings.
2
+
3
+ 'why': bridge API recommendations to harmonization manifests while respecting confidence bounds
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import logging
9
+ from collections.abc import Iterable, Mapping
10
+ from pathlib import Path
11
+ from typing import Final, cast
12
+
13
+ from ._models import MappingDiscoveryResult, MappingRecommendationOption, MappingSuggestion
14
+
15
+
16
+
17
+ def build_column_mapping_payload(
18
+ result: MappingDiscoveryResult,
19
+ threshold: float,
20
+ logger: logging.Logger | None = None,
21
+ ) -> dict[str, dict[str, dict[str, object]]]:
22
+ """Convert discovery output into the manifest structure expected by harmonization."""
23
+
24
+ active_logger = logger or logging.getLogger("netrias_client")
25
+ strongest = strongest_targets(result, threshold=threshold, logger=active_logger)
26
+ return {"column_mappings": _column_entries(strongest, active_logger)}
27
+
28
+
29
+ _COLUMN_METADATA: Final[dict[str, dict[str, object]]] = {
30
+ # "study_name": {"route": "api:passthrough", "targetField": "study_name"},
31
+ # "number_of_participants": {"route": "api:passthrough", "targetField": "number_of_participants"},
32
+ # "number_of_samples": {"route": "api:passthrough", "targetField": "number_of_samples"},
33
+ # "study_data_types": {
34
+ # "route": "api:passthrough",
35
+ # "targetField": "study_data_types",
36
+ # "cdeId": 12_571_096,
37
+ # "cde_id": 12_571_096,
38
+ # },
39
+ # "participant_id": {"route": "api:passthrough", "targetField": "participant_id"},
40
+ # "sample_id": {"route": "api:passthrough", "targetField": "sample_id"},
41
+ # "file_name": {"route": "api:passthrough", "targetField": "file_name"},
42
+ "primary_diagnosis": {
43
+ "route": "sagemaker:primary",
44
+ "targetField": "primary_diagnosis",
45
+ "cdeId": -200,
46
+ "cde_id": -200,
47
+ },
48
+ "therapeutic_agents": {
49
+ "route": "sagemaker:therapeutic_agents",
50
+ "targetField": "therapeutic_agents",
51
+ "cdeId": -203,
52
+ "cde_id": -203,
53
+ },
54
+ "morphology": {
55
+ "route": "sagemaker:morphology",
56
+ "targetField": "morphology",
57
+ "cdeId": -201,
58
+ "cde_id": -201,
59
+ },
60
+ # "tissue_or_organ_of_origin": {
61
+ # "route": "sagemaker:tissue_origin",
62
+ # "targetField": "tissue_or_organ_of_origin",
63
+ # "cdeId": -204,
64
+ # "cde_id": -204,
65
+ # },
66
+ # "site_of_resection_or_biopsy": {
67
+ # "route": "sagemaker:sample_anatomic_site",
68
+ # "targetField": "site_of_resection_or_biopsy",
69
+ # "cdeId": -202,
70
+ # "cde_id": -202,
71
+ # },
72
+ }
73
+
74
+
75
+ def strongest_targets(
76
+ result: MappingDiscoveryResult,
77
+ threshold: float,
78
+ logger: logging.Logger,
79
+ ) -> dict[str, str]:
80
+ """Return the highest-confidence target per column, filtered by threshold."""
81
+
82
+ if result.suggestions:
83
+ selected = _from_suggestions(result.suggestions, threshold)
84
+ else:
85
+ selected = _from_raw_payload(result.raw, threshold)
86
+
87
+ if selected:
88
+ logger.info("adapter strongest targets: %s", selected)
89
+ else:
90
+ logger.warning("adapter strongest targets empty after filtering")
91
+ return selected
92
+
93
+
94
+ def _column_entries(
95
+ strongest: Mapping[str, str],
96
+ logger: logging.Logger,
97
+ ) -> dict[str, dict[str, object]]:
98
+ entries: dict[str, dict[str, object]] = {}
99
+ missing_cde: dict[str, str] = {}
100
+ for source, target in strongest.items():
101
+ entry = _initial_entry(source, target)
102
+ if _needs_cde(entry):
103
+ missing_cde[source] = target
104
+ entries[source] = entry
105
+
106
+ _apply_metadata_defaults(entries)
107
+
108
+ if missing_cde:
109
+ logger.info("adapter unresolved targets (no CDE id mapping): %s", missing_cde)
110
+ return entries
111
+
112
+
113
+ def _initial_entry(source: str, target: str) -> dict[str, object]:
114
+ metadata = _COLUMN_METADATA.get(source)
115
+ if metadata is None:
116
+ return {"targetField": target}
117
+ # Preserve configured targetField when metadata defines it.
118
+ return dict(metadata)
119
+
120
+
121
+ def _needs_cde(entry: Mapping[str, object]) -> bool:
122
+ return "cdeId" not in entry
123
+
124
+
125
+ def _apply_metadata_defaults(entries: dict[str, dict[str, object]]) -> None:
126
+ for source, metadata in _COLUMN_METADATA.items():
127
+ if source not in entries:
128
+ entries[source] = dict(metadata)
129
+
130
+
131
+ def _from_suggestions(
132
+ suggestions: Iterable[MappingSuggestion], threshold: float
133
+ ) -> dict[str, str]:
134
+ strongest: dict[str, str] = {}
135
+ for suggestion in suggestions:
136
+ option = _top_option(suggestion.options, threshold)
137
+ if option is None or option.target is None:
138
+ continue
139
+ strongest[suggestion.source_column] = option.target
140
+ return strongest
141
+
142
+
143
+ def _from_raw_payload(payload: Mapping[str, object], threshold: float) -> dict[str, str]:
144
+ strongest: dict[str, str] = {}
145
+ for column, value in payload.items():
146
+ options = _coerce_options(value)
147
+ option = _top_option(options, threshold)
148
+ if option is None or option.target is None:
149
+ continue
150
+ strongest[column] = option.target
151
+ return strongest
152
+
153
+
154
+ def _coerce_options(value: object) -> tuple[MappingRecommendationOption, ...]:
155
+ if not isinstance(value, list):
156
+ return ()
157
+ return tuple(_option_iterator(cast(list[object], value)))
158
+
159
+
160
+ def _option_iterator(items: list[object]) -> Iterable[MappingRecommendationOption]:
161
+ for item in items:
162
+ if not isinstance(item, Mapping):
163
+ continue
164
+ option = _option_from_mapping(cast(Mapping[str, object], item))
165
+ if option is not None:
166
+ yield option
167
+
168
+
169
+ def _option_from_mapping(item: Mapping[str, object]) -> MappingRecommendationOption | None:
170
+ target = item.get("target")
171
+ if not isinstance(target, str):
172
+ return None
173
+ similarity = item.get("similarity")
174
+ score: float | None = None
175
+ if isinstance(similarity, (float, int)):
176
+ score = float(similarity)
177
+ return MappingRecommendationOption(target=target, confidence=score, raw=item)
178
+
179
+
180
+ def _top_option(
181
+ options: Iterable[MappingRecommendationOption], threshold: float
182
+ ) -> MappingRecommendationOption | None:
183
+ eligible = [opt for opt in options if _meets_threshold(opt, threshold)]
184
+ if not eligible:
185
+ return None
186
+ return max(eligible, key=lambda opt: opt.confidence or float("-inf"))
187
+
188
+
189
+ def _meets_threshold(option: MappingRecommendationOption, threshold: float) -> bool:
190
+ score = option.confidence
191
+ if score is None:
192
+ return False
193
+ return score >= threshold
194
+
195
+ def normalize_manifest_mapping(
196
+ manifest: Path | Mapping[str, object] | None,
197
+ ) -> dict[str, int]:
198
+ """Normalize manifest column→CDE entries for harmonization payloads."""
199
+
200
+ if manifest is None:
201
+ return {}
202
+ raw = _load_manifest_raw(manifest)
203
+ mapping = _mapping_dict(raw)
204
+ normalized: dict[str, int] = {}
205
+ for field, value in mapping.items():
206
+ _apply_cde_entry(normalized, field, value)
207
+ return normalized
208
+
209
+
210
+ def _load_manifest_raw(manifest: Path | Mapping[str, object]) -> Mapping[str, object]:
211
+ if isinstance(manifest, Path):
212
+ content = manifest.read_text(encoding="utf-8")
213
+ try:
214
+ return cast(Mapping[str, object], json.loads(content))
215
+ except json.JSONDecodeError as exc:
216
+ raise ValueError(f"manifest must be valid JSON: {exc}") from exc
217
+ return manifest
218
+
219
+
220
+ def _mapping_dict(raw: Mapping[str, object]) -> dict[str, object]:
221
+ mapping = _dict_if_str_mapping(raw)
222
+ if mapping is None:
223
+ return {}
224
+ candidate = _dict_if_str_mapping(mapping.get("column_mappings"))
225
+ return candidate if candidate is not None else mapping
226
+
227
+
228
+ def _dict_if_str_mapping(value: object) -> dict[str, object] | None:
229
+ if isinstance(value, Mapping):
230
+ typed = cast(Mapping[str, object], value)
231
+ return dict(typed)
232
+ return None
233
+
234
+
235
+ def _apply_cde_entry(destination: dict[str, int], field: object, value: object) -> None:
236
+ name = _clean_field(field)
237
+ cde_id = _coerce_cde_id(value)
238
+ if name is None or cde_id is None:
239
+ return
240
+ destination[name] = cde_id
241
+
242
+
243
+ def _clean_field(field: object) -> str | None:
244
+ if not isinstance(field, str):
245
+ return None
246
+ name = field.strip()
247
+ return name or None
248
+
249
+
250
+ def _coerce_cde_id(value: object) -> int | None:
251
+ candidate = _cde_candidate(value)
252
+ if candidate is None:
253
+ return None
254
+ return _int_from_candidate(candidate)
255
+
256
+
257
+ def _cde_candidate(value: object) -> object | None:
258
+ mapping = _dict_if_str_mapping(value)
259
+ if mapping is not None:
260
+ return mapping.get("cdeId") or mapping.get("cde_id")
261
+ return value
262
+
263
+
264
+ def _int_from_candidate(candidate: object) -> int | None:
265
+ if isinstance(candidate, bool):
266
+ return int(candidate)
267
+ if isinstance(candidate, (int, float)):
268
+ return _int_from_number(candidate)
269
+ if isinstance(candidate, str):
270
+ return _int_from_string(candidate)
271
+ return None
272
+
273
+
274
+ def _int_from_number(value: int | float) -> int | None:
275
+ try:
276
+ return int(value)
277
+ except (TypeError, ValueError):
278
+ return None
279
+
280
+
281
+ def _int_from_string(value: str) -> int | None:
282
+ stripped = value.strip()
283
+ if not stripped:
284
+ return None
285
+ try:
286
+ return int(stripped)
287
+ except ValueError:
288
+ return None