netrias_client 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netrias_client/__init__.py +18 -0
- netrias_client/_adapter.py +288 -0
- netrias_client/_client.py +559 -0
- netrias_client/_config.py +101 -0
- netrias_client/_core.py +560 -0
- netrias_client/_data_model_store.py +366 -0
- netrias_client/_discovery.py +525 -0
- netrias_client/_errors.py +37 -0
- netrias_client/_gateway_bypass.py +217 -0
- netrias_client/_http.py +234 -0
- netrias_client/_io.py +28 -0
- netrias_client/_logging.py +46 -0
- netrias_client/_models.py +115 -0
- netrias_client/_validators.py +192 -0
- netrias_client/scripts.py +313 -0
- netrias_client-0.1.0.dist-info/METADATA +178 -0
- netrias_client-0.1.0.dist-info/RECORD +20 -0
- netrias_client-0.1.0.dist-info/WHEEL +4 -0
- netrias_client-0.1.0.dist-info/entry_points.txt +5 -0
- netrias_client-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Validate inputs for harmonization.
|
|
2
|
+
|
|
3
|
+
'why': fail fast with clear, actionable messages prior to network calls
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from collections.abc import Mapping, Sequence
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ._errors import FileValidationError, MappingValidationError, OutputLocationError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# OBVIOUS HARD-CODED SIZE LIMIT: 250 MB maximum CSV size prior to upload
|
|
15
|
+
HARD_MAX_CSV_BYTES = 250 * 1024 * 1024
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def validate_source_path(path: Path) -> Path:
|
|
19
|
+
"""Ensure the CSV exists, is a file, has a .csv extension, and respects size limits."""
|
|
20
|
+
|
|
21
|
+
_require_exists(path, "source CSV not found")
|
|
22
|
+
_require_is_file(path, "source path is not a file")
|
|
23
|
+
_require_suffix(path, ".csv", "unsupported file extension for source CSV")
|
|
24
|
+
_require_not_too_large(path)
|
|
25
|
+
return path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_manifest_path(path: Path) -> Path:
|
|
29
|
+
"""Ensure the manifest JSON exists and is a file."""
|
|
30
|
+
|
|
31
|
+
_require_exists(path, "manifest JSON not found")
|
|
32
|
+
_require_is_file(path, "manifest path is not a file")
|
|
33
|
+
_require_suffix(path, ".json", "manifest must be a .json file")
|
|
34
|
+
return path
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def validate_output_path(path: Path | None, source_name: str, allow_versioning: bool = False) -> Path:
|
|
38
|
+
"""Return a valid output file path, creating parent directories when needed.
|
|
39
|
+
|
|
40
|
+
Defaults to `<CWD>/<source_name>.harmonized.csv` when `path` is None or a directory.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
candidate = _resolve_output_candidate(path, source_name)
|
|
44
|
+
_ensure_parent(candidate)
|
|
45
|
+
_require_parent_writable(candidate)
|
|
46
|
+
if allow_versioning:
|
|
47
|
+
candidate = _next_available_path(candidate)
|
|
48
|
+
else:
|
|
49
|
+
_require_not_exists(candidate)
|
|
50
|
+
return candidate
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_target_schema(schema: str) -> str:
|
|
54
|
+
"""Ensure the target schema identifier is a non-empty string."""
|
|
55
|
+
|
|
56
|
+
candidate = (schema or "").strip()
|
|
57
|
+
if not candidate:
|
|
58
|
+
raise MappingValidationError("target_schema must be a non-empty string")
|
|
59
|
+
return candidate
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_target_version(version: str) -> str:
|
|
63
|
+
"""Ensure the target version identifier is a non-empty string."""
|
|
64
|
+
|
|
65
|
+
candidate = (version or "").strip()
|
|
66
|
+
if not candidate:
|
|
67
|
+
raise MappingValidationError("target_version must be a non-empty string")
|
|
68
|
+
return candidate
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def validate_top_k(top_k: int | None) -> int | None:
|
|
72
|
+
"""Ensure top_k is a positive integer when provided."""
|
|
73
|
+
|
|
74
|
+
if top_k is None:
|
|
75
|
+
return None
|
|
76
|
+
if top_k < 1:
|
|
77
|
+
raise MappingValidationError("top_k must be a positive integer")
|
|
78
|
+
return top_k
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def validate_column_samples(columns: Mapping[str, Sequence[object]]) -> dict[str, list[str]]:
|
|
82
|
+
"""Normalize column sample data for mapping discovery."""
|
|
83
|
+
|
|
84
|
+
if not columns:
|
|
85
|
+
raise MappingValidationError("column data must include at least one column")
|
|
86
|
+
normalized: dict[str, list[str]] = {}
|
|
87
|
+
for raw_name, values in columns.items():
|
|
88
|
+
name = _normalized_column_name(raw_name)
|
|
89
|
+
samples = _normalized_samples(name, values)
|
|
90
|
+
normalized[name] = samples
|
|
91
|
+
return normalized
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _require_exists(path: Path, message: str) -> None:
|
|
95
|
+
if not path.exists():
|
|
96
|
+
raise FileValidationError(f"{message}: {path}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _require_is_file(path: Path, message: str) -> None:
|
|
100
|
+
if not path.is_file():
|
|
101
|
+
raise FileValidationError(f"{message}: {path}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _require_suffix(path: Path, suffix: str, message: str) -> None:
|
|
105
|
+
if path.suffix.lower() != suffix:
|
|
106
|
+
raise FileValidationError(f"{message}: {path.suffix}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _require_not_too_large(path: Path) -> None:
|
|
110
|
+
try:
|
|
111
|
+
size = os.path.getsize(path)
|
|
112
|
+
except OSError as exc:
|
|
113
|
+
raise FileValidationError(f"unable to stat source CSV: {exc}") from exc
|
|
114
|
+
if size > HARD_MAX_CSV_BYTES:
|
|
115
|
+
raise FileValidationError(
|
|
116
|
+
f"source CSV exceeds hard-coded limit of {HARD_MAX_CSV_BYTES // (1024 * 1024)} MB (got {size} bytes)"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _resolve_output_candidate(path: Path | None, source_name: str) -> Path:
|
|
121
|
+
if path is None:
|
|
122
|
+
return Path.cwd() / f"{source_name}.harmonized.csv"
|
|
123
|
+
if path.exists() and path.is_dir():
|
|
124
|
+
return path / f"{source_name}.harmonized.csv"
|
|
125
|
+
return path
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _ensure_parent(candidate: Path) -> None:
|
|
129
|
+
parent = candidate.parent
|
|
130
|
+
if not parent.exists():
|
|
131
|
+
try:
|
|
132
|
+
parent.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
except OSError as exc:
|
|
134
|
+
raise OutputLocationError(f"unable to create output directory {parent}: {exc}") from exc
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _require_parent_writable(candidate: Path) -> None:
|
|
138
|
+
parent = candidate.parent
|
|
139
|
+
if parent.exists() and not os.access(parent, os.W_OK):
|
|
140
|
+
raise OutputLocationError(f"output directory not writable: {parent}")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _require_not_exists(candidate: Path) -> None:
|
|
144
|
+
if candidate.exists():
|
|
145
|
+
raise OutputLocationError(f"refusing to overwrite existing file: {candidate}")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _next_available_path(candidate: Path) -> Path:
|
|
149
|
+
if not candidate.exists():
|
|
150
|
+
return candidate
|
|
151
|
+
stem = candidate.stem
|
|
152
|
+
suffix = candidate.suffix
|
|
153
|
+
parent = candidate.parent
|
|
154
|
+
index = 1
|
|
155
|
+
while index < 1000:
|
|
156
|
+
versioned = parent / f"{stem}.v{index}{suffix}"
|
|
157
|
+
if not versioned.exists():
|
|
158
|
+
return versioned
|
|
159
|
+
index += 1
|
|
160
|
+
raise OutputLocationError(
|
|
161
|
+
f"unable to determine unique output path after {index - 1} attempts for {candidate}"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _normalized_column_name(raw_name: object) -> str:
|
|
166
|
+
if not isinstance(raw_name, str):
|
|
167
|
+
raise MappingValidationError("column names must be strings")
|
|
168
|
+
name = raw_name.strip()
|
|
169
|
+
if not name:
|
|
170
|
+
raise MappingValidationError("column names must be non-empty strings")
|
|
171
|
+
return name
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _normalized_samples(column_name: str, values: Sequence[object] | None) -> list[str]:
|
|
175
|
+
sequence = _require_sequence(column_name, values)
|
|
176
|
+
samples = [sample for sample in (_coerced_sample(value) for value in sequence) if sample]
|
|
177
|
+
if not samples:
|
|
178
|
+
raise MappingValidationError(f"column '{column_name}' must include at least one non-empty sample value")
|
|
179
|
+
return samples
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _require_sequence(column_name: str, values: Sequence[object] | None) -> Sequence[object]:
|
|
183
|
+
if values is None or isinstance(values, (str, bytes)):
|
|
184
|
+
raise MappingValidationError(f"column '{column_name}' values must be a sequence of samples")
|
|
185
|
+
return values
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _coerced_sample(value: object) -> str | None:
|
|
189
|
+
if value is None:
|
|
190
|
+
return None
|
|
191
|
+
text = str(value).strip()
|
|
192
|
+
return text or None
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Coordinate project command entry points.
|
|
2
|
+
|
|
3
|
+
'why': centralize developer workflows for `uv run` execution
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
import tempfile
|
|
15
|
+
from collections.abc import Mapping, Sequence
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Final, cast
|
|
19
|
+
|
|
20
|
+
_LOGGER = logging.getLogger("netrias_client.scripts")
|
|
21
|
+
_COMMANDS: Final[tuple[tuple[str, ...], ...]] = (
|
|
22
|
+
("pytest",),
|
|
23
|
+
("ruff", "check", "."),
|
|
24
|
+
("basedpyright", "."),
|
|
25
|
+
)
|
|
26
|
+
_REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[2]
|
|
27
|
+
_PYPROJECT_PATH: Final[Path] = _REPO_ROOT / "pyproject.toml"
|
|
28
|
+
_PACKAGE_INIT_PATH: Final[Path] = Path(__file__).resolve().parent / "__init__.py"
|
|
29
|
+
_DIST_PATH: Final[Path] = _REPO_ROOT / "dist"
|
|
30
|
+
_VERSION_PATTERN: Final[re.Pattern[str]] = re.compile(r'^version\s*=\s*"(?P<value>[^"]+)"$', re.MULTILINE)
|
|
31
|
+
_INIT_VERSION_PATTERN: Final[re.Pattern[str]] = re.compile(r'^__version__\s*=\s*"(?P<value>[^"]+)"$', re.MULTILINE)
|
|
32
|
+
_REPOSITORY_CONFIG: Final[dict[str, tuple[str, str | None]]] = {
|
|
33
|
+
"testpypi": ("TEST_PYPI_TOKEN", "https://test.pypi.org/legacy/"),
|
|
34
|
+
"pypi": ("PYPI_TOKEN", None),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class ReleaseOptions:
|
|
40
|
+
"""Capture release CLI arguments as structured options.
|
|
41
|
+
|
|
42
|
+
'why': keep parsing separate from orchestration logic
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
version: str | None
|
|
46
|
+
bump: str
|
|
47
|
+
repository: str | None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def check() -> None:
|
|
51
|
+
"""Run the combined test and lint pipeline invoked by `uv run check`.
|
|
52
|
+
|
|
53
|
+
'why': provide a single entry point that exits early on the first failure
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_ensure_logging()
|
|
57
|
+
for command in _COMMANDS:
|
|
58
|
+
_run_command_or_raise(command)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def live_check() -> None:
|
|
62
|
+
"""Execute the live service smoke test harness.
|
|
63
|
+
|
|
64
|
+
'why': exercise production endpoints without duplicating CLI plumbing
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
_run_command_or_raise(("python", "-m", "netrias_client.live_test.test"))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def release(argv: Sequence[str] | None = None) -> None:
|
|
71
|
+
"""Run the release pipeline: bump version, validate, build, publish.
|
|
72
|
+
|
|
73
|
+
'why': streamline TestPyPI/PyPI releases from a single script
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
options = _parse_release_args(argv)
|
|
77
|
+
_ensure_logging()
|
|
78
|
+
target_version = _determine_target_version(options)
|
|
79
|
+
_update_versions(target_version)
|
|
80
|
+
_LOGGER.info("version synchronized → %s", target_version)
|
|
81
|
+
check()
|
|
82
|
+
artifacts = _build_distributions()
|
|
83
|
+
_verify_artifacts(artifacts)
|
|
84
|
+
if options.repository:
|
|
85
|
+
_publish_artifacts(options.repository)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _ensure_logging() -> None:
|
|
89
|
+
"""Provision a minimalist logging configuration for script execution."""
|
|
90
|
+
|
|
91
|
+
if not _LOGGER.handlers:
|
|
92
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _run_command(command: Sequence[str], *, env: Mapping[str, str] | None = None, display_command: Sequence[str] | None = None) -> int:
|
|
96
|
+
"""Run `command` and return its exit status without raising on failure.
|
|
97
|
+
|
|
98
|
+
'why': centralize subprocess logging and leave error handling to callers
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
shown = display_command or command
|
|
102
|
+
_LOGGER.info("→ %s", " ".join(shown))
|
|
103
|
+
completed = subprocess.run(command, check=False, env=dict(env) if env else None)
|
|
104
|
+
return completed.returncode
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _run_command_or_raise(
|
|
108
|
+
command: Sequence[str],
|
|
109
|
+
*,
|
|
110
|
+
env: Mapping[str, str] | None = None,
|
|
111
|
+
display_command: Sequence[str] | None = None,
|
|
112
|
+
) -> None:
|
|
113
|
+
"""Execute `command` and abort immediately on failure.
|
|
114
|
+
|
|
115
|
+
'why': surface the first failing command to halt composite workflows
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
exit_code = _run_command(command, env=env, display_command=display_command)
|
|
119
|
+
if exit_code != 0:
|
|
120
|
+
raise SystemExit(exit_code)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _parse_release_args(argv: Sequence[str] | None) -> ReleaseOptions:
|
|
124
|
+
"""Parse CLI arguments into a `ReleaseOptions` instance.
|
|
125
|
+
|
|
126
|
+
'why': isolate argparse wiring for straightforward testing
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
parser = argparse.ArgumentParser(prog="uv run release")
|
|
130
|
+
group = parser.add_mutually_exclusive_group()
|
|
131
|
+
_ = group.add_argument("--version", help="Explicit semantic version to publish")
|
|
132
|
+
_ = group.add_argument("--bump", choices=("patch", "minor", "major"), default="patch", help="Increment the current version")
|
|
133
|
+
_ = parser.add_argument("--publish", choices=("testpypi", "pypi"), dest="repository", help="Publish artifacts after verification")
|
|
134
|
+
namespace = parser.parse_args(list(argv) if argv is not None else sys.argv[1:])
|
|
135
|
+
version_arg = cast(str | None, getattr(namespace, "version", None))
|
|
136
|
+
bump_arg = cast(str | None, getattr(namespace, "bump", None))
|
|
137
|
+
repository_arg = cast(str | None, getattr(namespace, "repository", None))
|
|
138
|
+
bump = bump_arg or "patch"
|
|
139
|
+
return ReleaseOptions(version=version_arg, bump=bump, repository=repository_arg)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _determine_target_version(options: ReleaseOptions) -> str:
|
|
143
|
+
"""Decide the release version based on explicit input or a bump type.
|
|
144
|
+
|
|
145
|
+
'why': ensure both pyproject and package versions stay aligned
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
current = _read_version(_PYPROJECT_PATH, _VERSION_PATTERN)
|
|
149
|
+
_assert_versions_match(current)
|
|
150
|
+
if options.version:
|
|
151
|
+
return options.version
|
|
152
|
+
return _bump_semver(current, options.bump)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _assert_versions_match(expected: str) -> None:
|
|
156
|
+
"""Verify the package and pyproject versions are identical before bumping.
|
|
157
|
+
|
|
158
|
+
'why': prevent partial version updates that ship inconsistent metadata
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
package_version = _read_version(_PACKAGE_INIT_PATH, _INIT_VERSION_PATTERN)
|
|
162
|
+
if package_version != expected:
|
|
163
|
+
message = " ".join(
|
|
164
|
+
[
|
|
165
|
+
f"Package version mismatch: pyproject.toml has {expected}",
|
|
166
|
+
f"but src/netrias_client/__init__.py has {package_version}",
|
|
167
|
+
]
|
|
168
|
+
)
|
|
169
|
+
raise RuntimeError(message)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _read_version(path: Path, pattern: re.Pattern[str]) -> str:
|
|
173
|
+
"""Extract a version string from `path` using `pattern`.
|
|
174
|
+
|
|
175
|
+
'why': share parsing logic between pyproject and package metadata
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
text = path.read_text(encoding="utf-8")
|
|
179
|
+
match = pattern.search(text)
|
|
180
|
+
if match is None:
|
|
181
|
+
raise RuntimeError(f"Could not locate version string in {path}")
|
|
182
|
+
return match.group("value")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _bump_semver(version: str, bump: str) -> str:
|
|
186
|
+
"""Return a new semantic version string incremented by `bump` type.
|
|
187
|
+
|
|
188
|
+
'why': keep release increments predictable without external tooling
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
major_str, minor_str, patch_str = version.split(".")
|
|
192
|
+
major, minor, patch = int(major_str), int(minor_str), int(patch_str)
|
|
193
|
+
if bump == "major":
|
|
194
|
+
return f"{major + 1}.0.0"
|
|
195
|
+
if bump == "minor":
|
|
196
|
+
return f"{major}.{minor + 1}.0"
|
|
197
|
+
return f"{major}.{minor}.{patch + 1}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _update_versions(version: str) -> None:
|
|
201
|
+
"""Write `version` to pyproject.toml and the package __init__ module.
|
|
202
|
+
|
|
203
|
+
'why': guarantee distribution metadata matches the Python package
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
_replace_version(_PYPROJECT_PATH, _VERSION_PATTERN, f'version = "{version}"')
|
|
207
|
+
_replace_version(_PACKAGE_INIT_PATH, _INIT_VERSION_PATTERN, f'__version__ = "{version}"')
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _replace_version(path: Path, pattern: re.Pattern[str], replacement: str) -> None:
|
|
211
|
+
"""Swap the first version match in `path` with `replacement`.
|
|
212
|
+
|
|
213
|
+
'why': avoid manual editing and keep formatting stable
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
text = path.read_text(encoding="utf-8")
|
|
217
|
+
updated, count = pattern.subn(replacement, text, count=1)
|
|
218
|
+
if count != 1:
|
|
219
|
+
raise RuntimeError(f"Failed to update version in {path}")
|
|
220
|
+
_ = path.write_text(updated, encoding="utf-8")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _build_distributions() -> list[Path]:
|
|
224
|
+
"""Build wheel and sdist artifacts and return their paths.
|
|
225
|
+
|
|
226
|
+
'why': provide a clean slate before handing artifacts to verifiers
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
if _DIST_PATH.exists():
|
|
230
|
+
shutil.rmtree(_DIST_PATH)
|
|
231
|
+
_run_command_or_raise(("uv", "build"))
|
|
232
|
+
return sorted(_DIST_PATH.glob("*"))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _verify_artifacts(artifacts: Sequence[Path]) -> None:
|
|
236
|
+
"""Run metadata checks and a local install smoke test for `artifacts`.
|
|
237
|
+
|
|
238
|
+
'why': catch packaging issues before publishing to a remote index
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
dist_files = [
|
|
242
|
+
path
|
|
243
|
+
for path in artifacts
|
|
244
|
+
if path.suffix == ".whl" or tuple(path.suffixes[-2:]) == (".tar", ".gz")
|
|
245
|
+
]
|
|
246
|
+
if not dist_files:
|
|
247
|
+
raise RuntimeError("No distribution artifacts produced; run `uv build` first")
|
|
248
|
+
_run_command_or_raise(("uv", "run", "twine", "check", *[str(path) for path in dist_files]))
|
|
249
|
+
_smoke_test_artifacts(dist_files)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _smoke_test_artifacts(artifacts: Sequence[Path]) -> None:
|
|
253
|
+
"""Install the wheel into a temp venv and import the package.
|
|
254
|
+
|
|
255
|
+
'why': validate that the built wheel installs and exposes metadata
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
wheel = next((path for path in artifacts if path.suffix == ".whl"), None)
|
|
259
|
+
if wheel is None:
|
|
260
|
+
raise RuntimeError("Wheel artifact missing; expected *.whl after build")
|
|
261
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
262
|
+
env_path = Path(tmp_dir) / "venv"
|
|
263
|
+
_create_virtualenv(env_path)
|
|
264
|
+
python_path = _resolve_python(env_path)
|
|
265
|
+
if not python_path.exists():
|
|
266
|
+
raise RuntimeError(f"Smoke test interpreter missing: {python_path}")
|
|
267
|
+
_LOGGER.info("smoke test interpreter → %s", python_path)
|
|
268
|
+
_run_command_or_raise((str(python_path), "-m", "pip", "install", str(wheel)))
|
|
269
|
+
_run_command_or_raise((str(python_path), "-c", "import netrias_client as pkg; print(pkg.__version__)"))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _create_virtualenv(target: Path) -> None:
|
|
273
|
+
"""Create a fresh virtual environment at `target` with pip installed.
|
|
274
|
+
|
|
275
|
+
'why': isolate smoke tests from the developer environment
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
_run_command_or_raise(("uv", "venv", str(target), "--seed"))
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _resolve_python(env_path: Path) -> Path:
|
|
282
|
+
"""Locate the Python executable underneath `env_path`.
|
|
283
|
+
|
|
284
|
+
'why': handle platform differences without duplicating logic
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
bin_dir = env_path / ("Scripts" if sys.platform == "win32" else "bin")
|
|
288
|
+
candidates = ("python", "python3", "python.exe")
|
|
289
|
+
for candidate in candidates:
|
|
290
|
+
python_path = bin_dir / candidate
|
|
291
|
+
if python_path.exists():
|
|
292
|
+
return python_path
|
|
293
|
+
raise RuntimeError(f"Python executable not found under {bin_dir}")
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _publish_artifacts(repository: str) -> None:
|
|
297
|
+
"""Publish artifacts to the requested repository via `uv publish`.
|
|
298
|
+
|
|
299
|
+
'why': keep credential handling opinionated yet minimal
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
if repository not in _REPOSITORY_CONFIG:
|
|
303
|
+
raise RuntimeError(f"Unsupported repository '{repository}'")
|
|
304
|
+
env_var, publish_url = _REPOSITORY_CONFIG[repository]
|
|
305
|
+
token = os.environ.get(env_var)
|
|
306
|
+
if not token:
|
|
307
|
+
raise RuntimeError(f"Set {env_var} before publishing to {repository}")
|
|
308
|
+
command: list[str] = ["uv", "publish", "--username", "__token__", "--password", token]
|
|
309
|
+
display: list[str] = ["uv", "publish", "--username", "__token__", "--password", "******"]
|
|
310
|
+
if publish_url:
|
|
311
|
+
command.extend(["--publish-url", publish_url])
|
|
312
|
+
display.extend(["--publish-url", publish_url])
|
|
313
|
+
_run_command_or_raise(tuple(command), display_command=tuple(display))
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: netrias_client
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for the Netrias harmonization API
|
|
5
|
+
Project-URL: Homepage, https://github.com/netrias/netrias_client
|
|
6
|
+
Project-URL: Repository, https://github.com/netrias/netrias_client
|
|
7
|
+
Project-URL: Documentation, https://github.com/netrias/netrias_client#readme
|
|
8
|
+
Author-email: Chris Harman <charman@netrias.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Netrias
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: api,cde,client,harmonization,netrias
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Operating System :: OS Independent
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Requires-Dist: boto3
|
|
42
|
+
Requires-Dist: httpx
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: basedpyright; extra == 'dev'
|
|
45
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
47
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.5.0; extra == 'dev'
|
|
49
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: ty; extra == 'dev'
|
|
51
|
+
Requires-Dist: typing-extensions; extra == 'dev'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# Netrias Client
|
|
55
|
+
|
|
56
|
+
"""Explain how to install and exercise the Netrias harmonization client."""
|
|
57
|
+
|
|
58
|
+
## Install with `uv`
|
|
59
|
+
- Install `uv` once (or update): `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
|
60
|
+
- Sync dependencies for a project that consumes the client:
|
|
61
|
+
```bash
|
|
62
|
+
uv add netrias_client
|
|
63
|
+
uv add python-dotenv # optional helper for loading .env files
|
|
64
|
+
```
|
|
65
|
+
- Prefer `uv run <command>` for executing scripts so the managed environment is reused automatically.
|
|
66
|
+
|
|
67
|
+
### Alternative: `pip`
|
|
68
|
+
```bash
|
|
69
|
+
python -m pip install netrias_client
|
|
70
|
+
python -m pip install python-dotenv # optional
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quickstart Script
|
|
74
|
+
Reference script (save as `main.py`) showing a full harmonization round-trip:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
#!/usr/bin/env -S uv run python
|
|
78
|
+
# /// script
|
|
79
|
+
# requires-python = ">=3.13"
|
|
80
|
+
# dependencies = ["netrias_client", "python-dotenv"]
|
|
81
|
+
# ///
|
|
82
|
+
|
|
83
|
+
"""Exercise the packaged Netrias client against the live APIs."""
|
|
84
|
+
|
|
85
|
+
import asyncio
|
|
86
|
+
import os
|
|
87
|
+
from pathlib import Path
|
|
88
|
+
from typing import Final
|
|
89
|
+
|
|
90
|
+
from dotenv import load_dotenv
|
|
91
|
+
from netrias_client import NetriasClient, __version__ as CLIENT_VERSION
|
|
92
|
+
|
|
93
|
+
load_dotenv(override=True)
|
|
94
|
+
|
|
95
|
+
CSV_PATH: Final[Path] = Path("data/primary_diagnosis_1.csv")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def main() -> None:
|
|
99
|
+
client = NetriasClient()
|
|
100
|
+
client.configure(api_key=_resolve_api_key())
|
|
101
|
+
|
|
102
|
+
manifest = client.discover_cde_mapping(
|
|
103
|
+
source_csv=CSV_PATH,
|
|
104
|
+
target_schema="ccdi",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
result = await client.harmonize_async(
|
|
108
|
+
source_path=CSV_PATH,
|
|
109
|
+
manifest=manifest,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
print(f"netrias_client version: {CLIENT_VERSION}")
|
|
113
|
+
print(f"Harmonize status: {result.status}")
|
|
114
|
+
print(f"Harmonized file: {result.file_path}")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _resolve_api_key() -> str:
|
|
118
|
+
api_key = os.getenv("NETRIAS_API_KEY")
|
|
119
|
+
if api_key:
|
|
120
|
+
return api_key
|
|
121
|
+
msg = "Set NETRIAS_API_KEY in your environment or .env file"
|
|
122
|
+
raise RuntimeError(msg)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
if __name__ == "__main__":
|
|
126
|
+
asyncio.run(main())
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Steps
|
|
130
|
+
1. Install or update `uv` (see above).
|
|
131
|
+
2. Export `NETRIAS_API_KEY` (or add it to a local `.env`).
|
|
132
|
+
3. Adjust `CSV_PATH` to point at the source CSV you want to harmonize.
|
|
133
|
+
4. Run `uv run python main.py`.
|
|
134
|
+
|
|
135
|
+
## `configure()` Options
|
|
136
|
+
`NetriasClient.configure(...)` accepts additional tuning knobs. You can mix and match the ones you need:
|
|
137
|
+
|
|
138
|
+
| Parameter | Type | Purpose |
|
|
139
|
+
| --- | --- | --- |
|
|
140
|
+
| `api_key` | `str` | **Required.** Bearer token for authenticating with the Netrias services. |
|
|
141
|
+
| `timeout` | `float | None` | Override the default 6-hour timeout for long-running harmonization jobs. |
|
|
142
|
+
| `log_level` | `LogLevel | str | None` | Control verbosity (`INFO` by default). Accepts enum members or string names. |
|
|
143
|
+
| `confidence_threshold` | `float | None` | Minimum score (0–1) for keeping discovery recommendations; lower it to capture more tentative matches. |
|
|
144
|
+
| `discovery_use_gateway_bypass` | `bool | None` | Toggle the temporary AWS Lambda bypass path for discovery (defaults to `True`). Set to `False` once API Gateway limits are sufficient. |
|
|
145
|
+
| `log_directory` | `Path | str | None` | Directory for per-client log files. When omitted, logs stay on stdout. |
|
|
146
|
+
|
|
147
|
+
Configure only the options you need; unspecified values fall back to sensible defaults.
|
|
148
|
+
|
|
149
|
+
## Usage Notes
|
|
150
|
+
- `discover_cde_mapping(...)` samples CSV values and returns a manifest-ready payload; use the async variant if you’re already in an event loop.
|
|
151
|
+
- Call `harmonize(...)` (sync) or `harmonize_async(...)` (async) with the manifest to download a harmonized CSV. The result object reports status, description, and the output path.
|
|
152
|
+
- The package exposes `__version__` so callers can assert the installed release.
|
|
153
|
+
- Optional extras (`netrias_client[aws]`) add boto3 helpers for the temporary gateway bypass.
|
|
154
|
+
|
|
155
|
+
## Data Model Store (Validation)
|
|
156
|
+
Query reference data for validation use cases:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from netrias_client import NetriasClient
|
|
160
|
+
|
|
161
|
+
client = NetriasClient()
|
|
162
|
+
client.configure(api_key="...")
|
|
163
|
+
|
|
164
|
+
# List available data models
|
|
165
|
+
models = client.list_data_models()
|
|
166
|
+
|
|
167
|
+
# List CDEs for a model version
|
|
168
|
+
cdes = client.list_cdes("ccdi", "v1")
|
|
169
|
+
|
|
170
|
+
# Validate a value against permissible values
|
|
171
|
+
is_valid = client.validate_value("Male", "ccdi", "v1", "sex_at_birth")
|
|
172
|
+
|
|
173
|
+
# Or get the full PV set for repeated lookups
|
|
174
|
+
pv_set = client.get_pv_set("ccdi", "v1", "sex_at_birth")
|
|
175
|
+
assert "Male" in pv_set
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
All methods have async variants (`list_data_models_async`, `validate_value_async`, etc.).
|